Skip to content

Commit

Permalink
Don't initialize new instance of FastParse for every ArithmeticParser
Browse files Browse the repository at this point in the history
Instead continuously use one predefined instance to first transform it into a tree structure and then evaluate that against the data
  • Loading branch information
NTPape committed Jan 27, 2017
1 parent 5d8674f commit 7a42395
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 168 deletions.
6 changes: 3 additions & 3 deletions app/tuktu/processors/arithmetics/ArithmeticProcessor.scala
Expand Up @@ -43,17 +43,17 @@ class ArithmeticAggregateProcessor(resultName: String) extends BaseProcessor(res
var calculate: String = _
var numberOfDecimals: Int = _
var doRounding: Boolean = _

override def initialize(config: JsObject) {
calculate = (config \ "calculate").as[String]
numberOfDecimals = (config \ "number_of_decimals").asOpt[Int].getOrElse(0)
doRounding = (config \ "do_rounding").asOpt[Boolean].getOrElse(false)
}

override def processor(): Enumeratee[DataPacket, DataPacket] = Enumeratee.mapM(data => Future {
// Compute on entire DataPacket
val formula = utils.evaluateTuktuString(calculate, data.data.headOption.getOrElse(Map.empty))
val res = new TuktuArithmeticsParser(data.data)(formula)
val res = ArithmeticParser(formula, data.data)
data.map { datum => datum + (resultName -> res) }
})
}
21 changes: 9 additions & 12 deletions app/tuktu/processors/bucket/aggregate/AggregateByValue.scala
Expand Up @@ -18,7 +18,7 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
var group: List[String] = _
// The base value for each distinct value; for count() this is probably 1; for everything else it probably is the value of the ${field} you want the expression to be executed on
var base: String = _
// The expression, most of the time it probably is just min(), max(), count(), etc. (see TuktuArithmeticsParser for available functions), but can be combined
// The expression, most of the time it probably is just min(), max(), count(), etc. (see ArithmeticParser for available functions), but can be combined
var expression: String = _
var evaluatedExpression: Option[String] = None

Expand Down Expand Up @@ -58,8 +58,8 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
// We have to get all the values for this key over all data
val baseValues = preprocess(data.data)

// Create the parse for this field
val parser = new TuktuArithmeticsParser(baseValues.map(_._2))
// Create the parser data for this field
val d = baseValues.map(_._2)

// Get all values
val allValues = baseValues map { _._1 } distinct
Expand All @@ -73,7 +73,7 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
}
}
// Peplace functions with field value names
val newExpression = parser.allowedFunctions.foldLeft(evaluatedExpression.get)((a, b) => {
val newExpression = ArithmeticParser.allowedFunctions.foldLeft(evaluatedExpression.get)((a, b) => {
a.replace(b + "()", b + "(" + JsString(jsStrings.mkString(",")).toString + ")")
})

Expand All @@ -82,35 +82,32 @@ class AggregateByValueProcessor(resultName: String) extends BaseBucketProcessor(
case Nil => current
case head :: tail => {
def helper(path: List[String], value: Any): Map[String, Any] = path match {
case head :: Nil => Map(head -> value)
case head :: Nil => Map(head -> value)
case head :: tail => Map(head -> helper(tail, value))
}
buildResult(tail, utils.mergeMap(current, helper(head._1.split('.').toList, head._2)))
}
}
buildResult(group.zip(values) ++ List(resultName -> parser(newExpression)))
buildResult(group.zip(values) ++ List(resultName -> ArithmeticParser(newExpression, d)))
}
})
})

override def doProcess(data: List[Map[String, Any]]): List[Map[String, Any]] = {
if (data.isEmpty) List()
else {
// Create the parser
val parser = new TuktuArithmeticsParser(data)

// Get all values
val allValues = data.flatMap(_.keys).distinct

// Compute stuff
List((for (value <- allValues) yield {
// Peplace functions with field value names
val newExpression = parser.allowedFunctions.foldLeft(expression)((a, b) => {
// Replace functions with field value names
val newExpression = ArithmeticParser.allowedFunctions.foldLeft(expression)((a, b) => {
a.replace(b + "()", b + "(" + value + ")")
})

// Evaluate string
value -> parser(newExpression)
value -> ArithmeticParser(newExpression, data)
}).toMap)
}
}
Expand Down
179 changes: 83 additions & 96 deletions modules/api/app/tuktu/api/Parsing/Parsing.scala
@@ -1,16 +1,25 @@
package tuktu.api.Parsing

import fastparse.WhitespaceApi
import play.api.libs.json.{ Json, JsArray, JsObject, JsString, JsNull, JsValue }
import tuktu.api.utils.{ fieldParser, nearlyEqual }
import play.api.libs.json._
import scala.util.Try
import tuktu.api.utils.{ fieldParser, nearlyEqual }
import tuktu.api.statistics.StatHelper
import fastparse.WhitespaceApi
import fastparse.all.NoTrace

/**
* Performs arithmetics over a string representation
*/
object ArithmeticParser {
// Tree structure
abstract class DoubleNode
case class DoubleLeaf(d: Double) extends DoubleNode
case class FunctionLeaf(function: String, parameter: String) extends DoubleNode
case class AddNode(base: DoubleNode, children: Seq[(String, DoubleNode)]) extends DoubleNode
case class MultNode(base: DoubleNode, children: Seq[(String, DoubleNode)]) extends DoubleNode
case class PowNode(seq: Seq[DoubleNode]) extends DoubleNode
case class NegateNode(base: DoubleNode) extends DoubleNode

val White = WhitespaceApi.Wrapper {
import fastparse.all._
NoTrace(" ".rep)
Expand All @@ -19,68 +28,59 @@ object ArithmeticParser {
import White._

// Allow all sorts of numbers, negative and scientific notation
val number: P[Double] = P(
(
// If we have a dot, we don't necessarily need a number before the dot
("-".? ~ CharIn('0' to '9').rep ~ "." ~ CharIn('0' to '9').rep(min = 1) |
// Otherwise, we need a number
"-".? ~ CharIn('0' to '9').rep(min = 1))
~ ("e" ~ "-".? ~ CharIn('0' to '9').rep(min = 1)).?).! map { _.toDouble })
val parens: P[Double] = P("-".!.? ~ "(" ~/ addSub ~ ")").map { case (neg, double) => if (neg.isDefined) -double else double }
val factor: P[Double] = P(parens | number)

val pow: P[Double] = P(factor ~ (CharIn("^") ~/ factor).rep).map(evalPower)
val divMul: P[Double] = P(pow ~ (CharIn("*/").! ~/ pow).rep).map(eval)
val addSub: P[Double] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep).map(eval)
val expr: P[Double] = P(Start ~/ addSub ~ End)

def evalPower(tree: (Double, Seq[Double])): Double = {
def helper(list: List[Double]): Double = list match {
case Nil => 1
case a :: tail => Math.pow(a, helper(tail))
}
helper(tree._1 :: tree._2.toList)
}
def eval(tree: (Double, Seq[(String, Double)])): Double = {
val (base, ops) = tree
ops.foldLeft(base) {
case (left, (op, right)) => op match {
case "+" => left + right
case "-" => left - right
case "*" => left * right
case "/" => left / right
}
}
}

def apply(str: String): Double = {
expr.parse(str).get.value
}
}

/**
* Performs arithmetics and aggregations over entire DataPackets
*/
class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
val White = WhitespaceApi.Wrapper {
import fastparse.all._
NoTrace(" ".rep)
}
import fastparse.noApi._
import White._
val number: P[DoubleLeaf] = P(
// If we have a dot, we don't necessarily need a number before the dot
("-".? ~ CharIn('0' to '9').rep ~ "." ~ CharIn('0' to '9').rep(min = 1) |
// Otherwise, we need a number
"-".? ~ CharIn('0' to '9').rep(min = 1))
~ ("e" ~ "-".? ~ CharIn('0' to '9').rep(min = 1)).?).!
.map { s => DoubleLeaf(s.toDouble) }
val parens: P[DoubleNode] = P("-".!.? ~ "(" ~/ addSub ~ ")")
.map { case (neg, n) => if (neg.isDefined) NegateNode(n) else n }
val factor: P[DoubleNode] = P(parens | number | functions)

// List of allowed functions
val allowedFunctions = List("count", "avg", "median", "sum", "max", "min", "stdev")

// Function parameter
val parameter: P[String] = P("\"" ~ ("\\\"" | CharPred(_ != '"')).rep ~ "\"").!.map {
str => Json.parse(str).as[String]
}

val parameter: P[String] = P("\"" ~ ("\\\"" | CharPred(_ != '"')).rep ~ "\"").!
.map { str => Json.parse(str).as[String] }
// All Tuktu-defined arithmetic functions
val functions: P[Double] = P(
StringIn(allowedFunctions: _*).! ~/ "(" ~/ (parameter | CharPred(_ != ')').rep.!) ~ ")").map {
case ("avg", field) => {
val functions: P[FunctionLeaf] = P(StringIn(allowedFunctions: _*).! ~/ "(" ~/ (parameter | CharPred(_ != ')').rep.!) ~ ")")
.map { case (func, param) => FunctionLeaf(func, param) }

// Operations
val pow: P[PowNode] = P(factor ~ (CharIn("^") ~/ factor).rep)
.map { case (base, seq) => PowNode(base +: seq) }
val divMul: P[MultNode] = P(pow ~ (CharIn("*/").! ~/ pow).rep)
.map { case (base, seq) => MultNode(base, seq) }
val addSub: P[AddNode] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep)
.map { case (base, seq) => AddNode(base, seq) }
val expr: P[DoubleNode] = P(Start ~/ addSub ~ End)

// Evaluate the tree
def eval(d: DoubleNode)(implicit data: List[Map[String, Any]] = Nil): Double = d match {
case DoubleLeaf(d) => d
case AddNode(base, ops) =>
ops.foldLeft(eval(base)) {
case (acc, (op, current)) => op match {
case "+" => acc + eval(current)
case "-" => acc - eval(current)
}
}
case MultNode(base, ops) =>
ops.foldLeft(eval(base)) {
case (acc, (op, current)) => op match {
case "*" => acc * eval(current)
case "/" => acc / eval(current)
}
}
case PowNode(seq) =>
seq.foldRight(1d) {
case (current, acc) => Math.pow(eval(current), acc)
}
case NegateNode(n) => -eval(n)
case FunctionLeaf(f, field) => f match {
case "avg" =>
val (sum, count) = data.foldLeft(0.0, 0) {
case ((sum, count), datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }
Expand All @@ -93,8 +93,7 @@ class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
sum / count
else
0.0
}
case ("median", field) => {
case "median" =>
val sortedData = (for (datum <- data; v = fieldParser(datum, field) if v.isDefined) yield StatHelper.anyToDouble(v.get)).sorted

// Find the mid element
Expand All @@ -108,45 +107,30 @@ class TuktuArithmeticsParser(data: List[Map[String, Any]]) {
(sortedData(n1) + sortedData(n2)) / 2
} else
sortedData((n - 1) / 2)
}
case ("sum", field) => {
case "sum" =>
data.foldLeft(0.0) { (sum, datum) => sum + fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(0.0) }
}
case ("max", field) => {
case "max" =>
data.foldLeft(Double.MinValue) { (max, datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(Double.MinValue)
if (v > max) v else max
}
}
case ("min", field) => {
case "min" =>
data.foldLeft(Double.MaxValue) { (min, datum) =>
val v = fieldParser(datum, field).map { StatHelper.anyToDouble(_) }.getOrElse(Double.MaxValue)
if (v < min) v else min
}
}
case ("stdev", field) => {
case "stdev" =>
// Get variance
val vars = StatHelper.getVariances(data, List(field))

// Sqrt them to get StDevs
vars.map(v => v._1 -> math.sqrt(v._2)).head._2
}
case ("count", field) => {
case "count" =>
data.count { datum => fieldParser(datum, field).isDefined }
}
}

val parens: P[Double] = P("-".!.? ~ "(" ~/ addSub ~ ")").map { case (neg, double) => if (neg.isDefined) -double else double }
val factor: P[Double] = P(parens | ArithmeticParser.number | functions)

val pow: P[Double] = P(factor ~ (CharIn("^") ~/ factor).rep).map(ArithmeticParser.evalPower)
val divMul: P[Double] = P(pow ~ (CharIn("*/").! ~/ pow).rep).map(ArithmeticParser.eval)
val addSub: P[Double] = P(divMul ~ (CharIn("+-").! ~/ divMul).rep).map(ArithmeticParser.eval)
val expr: P[Double] = P(Start ~/ addSub ~ End)

def apply(str: String): Double = {
expr.parse(str).get.value
}

def apply(str: String, data: List[Map[String, Any]] = Nil): Double = eval(expr.parse(str).get.value)(data)
}

/**
Expand All @@ -156,6 +140,7 @@ object PredicateParser {
// Tree structure
abstract class BooleanNode
case class BooleanLeaf(b: Boolean) extends BooleanNode
case class ArithmeticLeaf(left: ArithmeticParser.DoubleNode, op: String, right: ArithmeticParser.DoubleNode) extends BooleanNode
case class FunctionLeaf(function: String, parameter: String) extends BooleanNode
case class EqualsNode(node1: BooleanNode, operator: String, b2: BooleanNode) extends BooleanNode
case class AndNode(children: Seq[BooleanNode]) extends BooleanNode
Expand All @@ -174,17 +159,8 @@ object PredicateParser {
.map { case (neg, pred) => if (neg.size % 2 == 0) BooleanLeaf(pred.toBoolean) else BooleanLeaf(!pred.toBoolean) }

// Evaluate arithmetic expressions on numbers using the ArithmeticParser
val arithExpr: P[BooleanLeaf] = P(ArithmeticParser.addSub ~ (">=" | "<=" | "==" | "!=" | "<" | ">").! ~ ArithmeticParser.addSub)
.map {
case (left, op, right) => op match {
case "<" => left < right && !nearlyEqual(left, right)
case ">" => left > right && !nearlyEqual(left, right)
case "<=" => left < right || nearlyEqual(left, right)
case ">=" => left > right || nearlyEqual(left, right)
case "==" => nearlyEqual(left, right)
case "!=" => !nearlyEqual(left, right)
}
}.map { BooleanLeaf(_) }
val arithExpr: P[ArithmeticLeaf] = P(ArithmeticParser.addSub ~ (">=" | "<=" | "==" | "!=" | "<" | ">").! ~ ArithmeticParser.addSub)
.map { case (left, op, right) => ArithmeticLeaf(left, op, right) }

// Evaluate string expressions
val strings: P[String] = P(
Expand Down Expand Up @@ -221,14 +197,25 @@ object PredicateParser {

val expr: P[BooleanNode] = P(Start ~/ or ~ End)

def apply(str: String, datum: Map[String, Any]): Boolean = {
def apply(str: String, datum: Map[String, Any] = Map.empty): Boolean = {
def eval(b: BooleanNode): Boolean = b match {
case BooleanLeaf(b: Boolean) => b
case EqualsNode(n1, "==", n2) => eval(n1) == eval(n2)
case EqualsNode(n1, "!=", n2) => eval(n1) != eval(n2)
case AndNode(seq) => seq.forall { eval(_) }
case OrNode(seq) => seq.exists { eval(_) }
case NegateNode(n) => !eval(n)
case ArithmeticLeaf(left, op, right) =>
val l = ArithmeticParser.eval(left)
val r = ArithmeticParser.eval(right)
op match {
case "<" => l < r && !nearlyEqual(l, r)
case ">" => l > r && !nearlyEqual(l, r)
case "<=" => l < r || nearlyEqual(l, r)
case ">=" => l > r || nearlyEqual(l, r)
case "==" => nearlyEqual(l, r)
case "!=" => !nearlyEqual(l, r)
}
case FunctionLeaf(f, param) => f match {
case "containsFields" => param.split(',').forall { path =>
// Get the path and evaluate it against the datum
Expand Down

0 comments on commit 7a42395

Please sign in to comment.