Don't initialize new instance of FastParse for every ArithmeticParser
Instead continuously use one predefined instance to first transform it into a tree structure and then evaluate that against the data
NTPape committed Jan 27, 2017
1 parent 5d8674f commit 7a42395
Showing 5 changed files with 143 additions and 168 deletions.
app/tuktu/processors/arithmetics/ArithmeticProcessor.scala
Expand Up @@ -43,17 +43,17 @@ class ArithmeticAggregateProcessor(resultName: String) extends BaseProcessor(res
var calculate: String = _
var numberOfDecimals: Int = _
var doRounding: Boolean = _

override def initialize(config: JsObject) {
calculate = (config \ "calculate").as[String]
numberOfDecimals = (config \ "number_of_decimals").asOpt[Int].getOrElse(0)
doRounding = (config \ "do_rounding").asOpt[Boolean].getOrElse(false)

override def processor(): Enumeratee[DataPacket, DataPacket] = Enumeratee.mapM(data => Future {
// Compute on entire DataPacket
val formula = utils.evaluateTuktuString(calculate,
val res = new TuktuArithmeticsParser(
val res = ArithmeticParser(formula, { datum => datum + (resultName -> res) }
app/tuktu/processors/bucket/aggregate/AggregateByValue.scala
Expand Up @@ -18,7 +18,7 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
var group: List[String] = _
// The base value for each distinct value; for count() this is probably 1; for everything else it probably is the value of the ${field} you want the expression to be executed on
var base: String = _
// The expression, most of the time it probably is just min(), max(), count(), etc. (see TuktuArithmeticsParser for available functions), but can be combined
// The expression, most of the time it probably is just min(), max(), count(), etc. (see ArithmeticParser for available functions), but can be combined
var expression: String = _
var evaluatedExpression: Option[String] = None

Expand Down Expand Up @@ -58,8 +58,8 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
// We have to get all the values for this key over all data
val baseValues = preprocess(

// Create the parse for this field
val parser = new TuktuArithmeticsParser(
// Create the parser data for this field
val d =

// Get all values
val allValues = baseValues map { _._1 } distinct
Expand All @@ -73,7 +73,7 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
// Peplace functions with field value names
val newExpression = parser.allowedFunctions.foldLeft(evaluatedExpression.get)((a, b) => {
val newExpression = ArithmeticParser.allowedFunctions.foldLeft(evaluatedExpression.get)((a, b) => {
a.replace(b + "()", b + "(" + JsString(jsStrings.mkString(",")).toString + ")")

Expand All @@ -82,35 +82,32 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
case Nil => current
case head :: tail => {
def helper(path: List[String], value: Any): Map[String, Any] = path match {
case head :: Nil => Map(head -> value)
case head :: Nil => Map(head -> value)
case head :: tail => Map(head -> helper(tail, value))
buildResult(tail, utils.mergeMap(current, helper(head._1.split('.').toList, head._2)))
buildResult( ++ List(resultName -> parser(newExpression)))
buildResult( ++ List(resultName -> ArithmeticParser(newExpression, d)))

override def doProcess(data: List[Map[String, Any]]): List[Map[String, Any]] = {
if (data.isEmpty) List()
else {
// Create the parser
val parser = new TuktuArithmeticsParser(data)

// Get all values
val allValues = data.flatMap(_.keys).distinct

// Compute stuff
List((for (value <- allValues) yield {
// Peplace functions with field value names
val newExpression = parser.allowedFunctions.foldLeft(expression)((a, b) => {
// Replace functions with field value names
val newExpression = ArithmeticParser.allowedFunctions.foldLeft(expression)((a, b) => {
a.replace(b + "()", b + "(" + value + ")")

// Evaluate string
value -> parser(newExpression)
value -> ArithmeticParser(newExpression, data)
Expand Down
modules/api/app/tuktu/api/Parsing/Parsing.scala
@@ -1,16 +1,25 @@
package tuktu.api.Parsing

import fastparse.WhitespaceApi
import play.api.libs.json.{ Json, JsArray, JsObject, JsString, JsNull, JsValue }
import tuktu.api.utils.{ fieldParser, nearlyEqual }
import play.api.libs.json._
import scala.util.Try
import tuktu.api.utils.{ fieldParser, nearlyEqual }
import tuktu.api.statistics.StatHelper
import fastparse.WhitespaceApi
import fastparse.all.NoTrace

* Performs arithmetics over a string representation
object ArithmeticParser {
// Tree structure
abstract class DoubleNode
case class DoubleLeaf(d: Double) extends DoubleNode
case class FunctionLeaf(function: String, parameter: String) extends DoubleNode
case class AddNode(base: DoubleNode, children: Seq[(String, DoubleNode)]) extends DoubleNode
case class MultNode(base: DoubleNode, children: Seq[(String, DoubleNode)]) extends DoubleNode
case class PowNode(seq: Seq[DoubleNode]) extends DoubleNode
case class NegateNode(base: DoubleNode) extends DoubleNode

val White = WhitespaceApi.Wrapper {
import fastparse.all._
NoTrace(" ".rep)
Expand All @@ -19,68 +28,59 @@ object ArithmeticParser {
import White._

// Allow all sorts of numbers, negative and scientific notation
val number: P[Double] = P(
// If we have a dot, we don't necessarily need a number before the dot
("-".? ~ CharIn('0' to '9').rep ~ "." ~ CharIn('0' to '9').rep(min = 1) |
// Otherwise, we need a number
"-".? ~ CharIn('0' to '9').rep(min = 1))
~ ("e" ~ "-".? ~ CharIn('0' to '9').rep(min = 1)).?).! map { _.toDouble })
val parens: P[Double] = P("-".!.? ~ "(" ~/ addSub ~ ")").map { case (neg, double) => if (neg.isDefined) -double else double }
val factor: P[Double] = P(parens | number)

val pow: P[Double] = P(factor ~ (CharIn("^") ~/ factor).rep).map(evalPower)
val divMul: P[Double] = P(pow ~ (CharIn("*/").! ~/ pow).rep).map(eval)
val addSub: P[Double] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep).map(eval)
val expr: P[Double] = P(Start ~/ addSub ~ End)

def evalPower(tree: (Double, Seq[Double])): Double = {
def helper(list: List[Double]): Double = list match {
case Nil => 1
case a :: tail => Math.pow(a, helper(tail))
helper(tree._1 :: tree._2.toList)
def eval(tree: (Double, Seq[(String, Double)])): Double = {
val (base, ops) = tree
ops.foldLeft(base) {
case (left, (op, right)) => op match {
case "+" => left + right
case "-" => left - right
case "*" => left * right
case "/" => left / right

def apply(str: String): Double = {

* Performs arithmetics and aggregations over entire DataPackets
class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
val White = WhitespaceApi.Wrapper {
import fastparse.all._
NoTrace(" ".rep)
import fastparse.noApi._
import White._
val number: P[DoubleLeaf] = P(
// If we have a dot, we don't necessarily need a number before the dot
("-".? ~ CharIn('0' to '9').rep ~ "." ~ CharIn('0' to '9').rep(min = 1) |
// Otherwise, we need a number
"-".? ~ CharIn('0' to '9').rep(min = 1))
~ ("e" ~ "-".? ~ CharIn('0' to '9').rep(min = 1)).?).!
.map { s => DoubleLeaf(s.toDouble) }
val parens: P[DoubleNode] = P("-".!.? ~ "(" ~/ addSub ~ ")")
.map { case (neg, n) => if (neg.isDefined) NegateNode(n) else n }
val factor: P[DoubleNode] = P(parens | number | functions)

// List of allowed functions
val allowedFunctions = List("count", "avg", "median", "sum", "max", "min", "stdev")

// Function parameter
val parameter: P[String] = P("\"" ~ ("\\\"" | CharPred(_ != '"')).rep ~ "\"").!.map {
str => Json.parse(str).as[String]

val parameter: P[String] = P("\"" ~ ("\\\"" | CharPred(_ != '"')).rep ~ "\"").!
.map { str => Json.parse(str).as[String] }
// All Tuktu-defined arithmetic functions
val functions: P[Double] = P(
StringIn(allowedFunctions: _*).! ~/ "(" ~/ (parameter | CharPred(_ != ')').rep.!) ~ ")").map {
case ("avg", field) => {
val functions: P[FunctionLeaf] = P(StringIn(allowedFunctions: _*).! ~/ "(" ~/ (parameter | CharPred(_ != ')').rep.!) ~ ")")
.map { case (func, param) => FunctionLeaf(func, param) }

// Operations
val pow: P[PowNode] = P(factor ~ (CharIn("^") ~/ factor).rep)
.map { case (base, seq) => PowNode(base +: seq) }
val divMul: P[MultNode] = P(pow ~ (CharIn("*/").! ~/ pow).rep)
.map { case (base, seq) => MultNode(base, seq) }
val addSub: P[AddNode] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep)
.map { case (base, seq) => AddNode(base, seq) }
val expr: P[DoubleNode] = P(Start ~/ addSub ~ End)

// Evaluate the tree
def eval(d: DoubleNode)(implicit data: List[Map[String, Any]] = Nil): Double = d match {
case DoubleLeaf(d) => d
case AddNode(base, ops) =>
ops.foldLeft(eval(base)) {
case (acc, (op, current)) => op match {
case "+" => acc + eval(current)
case "-" => acc - eval(current)
case MultNode(base, ops) =>
ops.foldLeft(eval(base)) {
case (acc, (op, current)) => op match {
case "*" => acc * eval(current)
case "/" => acc / eval(current)
case PowNode(seq) =>
seq.foldRight(1d) {
case (current, acc) => Math.pow(eval(current), acc)
case NegateNode(n) => -eval(n)
case FunctionLeaf(f, field) => f match {
case "avg" =>
val (sum, count) = data.foldLeft(0.0, 0) {
case ((sum, count), datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }
Expand All @@ -93,8 +93,7 @@ class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
sum / count
case ("median", field) => {
case "median" =>
val sortedData = (for (datum <- data; v = fieldParser(datum, field) if v.isDefined) yield StatHelper.anyToDouble(v.get)).sorted

// Find the mid element
Expand All @@ -108,45 +107,30 @@ class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
(sortedData(n1) + sortedData(n2)) / 2
} else
sortedData((n - 1) / 2)
case ("sum", field) => {
case "sum" =>
data.foldLeft(0.0) { (sum, datum) => sum + fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(0.0) }
case ("max", field) => {
case "max" =>
data.foldLeft(Double.MinValue) { (max, datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(Double.MinValue)
if (v > max) v else max
case ("min", field) => {
case "min" =>
data.foldLeft(Double.MaxValue) { (min, datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(Double.MaxValue)
if (v < min) v else min
case ("stdev", field) => {
case "stdev" =>
// Get variance
val vars = StatHelper.getVariances(data, List(field))

// Sqrt them to get StDevs => v._1 -> math.sqrt(v._2)).head._2
case ("count", field) => {
case "count" =>
data.count { datum => fieldParser(datum, field).isDefined }

val parens: P[Double] = P("-".!.? ~ "(" ~/ addSub ~ ")").map { case (neg, double) => if (neg.isDefined) -double else double }
val factor: P[Double] = P(parens | ArithmeticParser.number | functions)

val pow: P[Double] = P(factor ~ (CharIn("^") ~/ factor).rep).map(ArithmeticParser.evalPower)
val divMul: P[Double] = P(pow ~ (CharIn("*/").! ~/ pow).rep).map(ArithmeticParser.eval)
val addSub: P[Double] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep).map(ArithmeticParser.eval)
val expr: P[Double] = P(Start ~/ addSub ~ End)

def apply(str: String): Double = {

def apply(str: String, data: List[Map[String, Any]] = Nil): Double = eval(expr.parse(str).get.value)(data)

Expand All @@ -156,6 +140,7 @@ object PredicateParser {
// Tree structure
abstract class BooleanNode
case class BooleanLeaf(b: Boolean) extends BooleanNode
case class ArithmeticLeaf(left: ArithmeticParser.DoubleNode, op: String, right: ArithmeticParser.DoubleNode) extends BooleanNode
case class FunctionLeaf(function: String, parameter: String) extends BooleanNode
case class EqualsNode(node1: BooleanNode, operator: String, b2: BooleanNode) extends BooleanNode
case class AndNode(children: Seq[BooleanNode]) extends BooleanNode
Expand All @@ -174,17 +159,8 @@ object PredicateParser {
.map { case (neg, pred) => if (neg.size % 2 == 0) BooleanLeaf(pred.toBoolean) else BooleanLeaf(!pred.toBoolean) }

// Evaluate arithmetic expressions on numbers using the ArithmeticParser
val arithExpr: P[BooleanLeaf] = P(ArithmeticParser.addSub ~ (">=" | "<=" | "==" | "!=" | "<" | ">").! ~ ArithmeticParser.addSub)
.map {
case (left, op, right) => op match {
case "<" => left < right && !nearlyEqual(left, right)
case ">" => left > right && !nearlyEqual(left, right)
case "<=" => left < right || nearlyEqual(left, right)
case ">=" => left > right || nearlyEqual(left, right)
case "==" => nearlyEqual(left, right)
case "!=" => !nearlyEqual(left, right)
}.map { BooleanLeaf(_) }
val arithExpr: P[ArithmeticLeaf] = P(ArithmeticParser.addSub ~ (">=" | "<=" | "==" | "!=" | "<" | ">").! ~ ArithmeticParser.addSub)
.map { case (left, op, right) => ArithmeticLeaf(left, op, right) }

// Evaluate string expressions
val strings: P[String] = P(
Expand Down Expand Up @@ -221,14 +197,25 @@ object PredicateParser {

val expr: P[BooleanNode] = P(Start ~/ or ~ End)

def apply(str: String, datum: Map[String, Any]): Boolean = {
def apply(str: String, datum: Map[String, Any] = Map.empty): Boolean = {
def eval(b: BooleanNode): Boolean = b match {
case BooleanLeaf(b: Boolean) => b
case EqualsNode(n1, "==", n2) => eval(n1) == eval(n2)
case EqualsNode(n1, "!=", n2) => eval(n1) != eval(n2)
case AndNode(seq) => seq.forall { eval(_) }
case OrNode(seq) => seq.exists { eval(_) }
case NegateNode(n) => !eval(n)
case ArithmeticLeaf(left, op, right) =>
val l = ArithmeticParser.eval(left)
val r = ArithmeticParser.eval(right)
op match {
case "<" => l < r && !nearlyEqual(l, r)
case ">" => l > r && !nearlyEqual(l, r)
case "<=" => l < r || nearlyEqual(l, r)
case ">=" => l > r || nearlyEqual(l, r)
case "==" => nearlyEqual(l, r)
case "!=" => !nearlyEqual(l, r)
case FunctionLeaf(f, param) => f match {
case "containsFields" => param.split(',').forall { path =>
// Get the path and evaluate it against the datum
Expand Down

