Skip to content

Commit

Permalink
remove traversal, attempt 3 (#2784)
Browse files Browse the repository at this point in the history
The third attempt at removing Traversal, this time with a detailed migration guide in a new changelog. Please do read the `News.md` file. The thing may be too verbose?

This PR is WIP in the sense that its upstream is not in yet -- I think we first need to review how happy we are with the documentation and the amount of breakage (we might want to change some upstream things to further improve compat here?).
  • Loading branch information
bbrehm committed Jun 2, 2023
1 parent 4a098ff commit f74cc7a
Show file tree
Hide file tree
Showing 132 changed files with 592 additions and 395 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Documentation: https://docs.joern.io/

Specification: https://cpg.joern.io

## News / Changelog

- Joern v2.0.0 removes the `overflowdb.traversal.Traversal` class. This change is not completely backwards compatible. See [here](changelog/traversal_removal.md) for a detailed writeup.

## Requirements

- JDK 19 (other versions _might_ work, but have not been properly tested)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "benchmarks"

crossScalaVersions := Seq("2.13.8", "3.2.2")
crossScalaVersions := Seq("2.13.8", "3.3.0")

dependsOn(Projects.dataflowengineoss)
dependsOn(Projects.semanticcpg)
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name := "joern"
ThisBuild / organization := "io.joern"
ThisBuild / scalaVersion := "2.13.8"

val cpgVersion = "1.3.600"
val cpgVersion = "1.3.612"

lazy val joerncli = Projects.joerncli
lazy val querydb = Projects.querydb
Expand Down
279 changes: 279 additions & 0 deletions changelog/traversal_removal.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion console/src/main/scala/io/joern/console/scan/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ package object scan {

implicit class ScannerStarters(val cpg: Cpg) extends AnyVal {
def finding: Traversal[Finding] =
cpg.graph.nodes(NodeTypes.FINDING).cast[Finding]
overflowdb.traversal.InitialTraversal.from[Finding](cpg.graph, NodeTypes.FINDING)
}

implicit class QueryWrapper(q: Query) {
Expand Down
2 changes: 1 addition & 1 deletion dataflowengineoss/build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "dataflowengineoss"

crossScalaVersions := Seq("2.13.8", "3.2.2")
crossScalaVersions := Seq("2.13.8", "3.3.0")

dependsOn(Projects.semanticcpg, Projects.x2cpg)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import io.joern.dataflowengineoss.queryengine.{
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.Traversal
import scala.collection.mutable
import scala.collection.parallel.CollectionConverters._

Expand All @@ -35,14 +34,16 @@ class ExtendedCfgNode(val traversal: Traversal[CfgNode]) extends AnyVal {
result
}

def reachableBy[NodeType](sourceTravs: Traversal[NodeType]*)(implicit context: EngineContext): Traversal[NodeType] = {
def reachableBy[NodeType](
sourceTravs: IterableOnce[NodeType]*
)(implicit context: EngineContext): Traversal[NodeType] = {
val sources = sourceTravsToStartingPoints(sourceTravs: _*)
val reachedSources =
reachableByInternal(sources).map(_.path.head.node)
Traversal.from(reachedSources).cast[NodeType]
reachedSources.cast[NodeType]
}

def reachableByFlows[A](sourceTravs: Traversal[A]*)(implicit context: EngineContext): Traversal[Path] = {
def reachableByFlows[A](sourceTravs: IterableOnce[A]*)(implicit context: EngineContext): Traversal[Path] = {
val sources = sourceTravsToStartingPoints(sourceTravs: _*)
val startingPoints = sources.map(_.startingPoint)
val paths = reachableByInternal(sources).par
Expand All @@ -62,7 +63,7 @@ class ExtendedCfgNode(val traversal: Traversal[CfgNode]) extends AnyVal {
.dedup
.flatten
.toVector
paths.to(Traversal)
paths.iterator
}

def reachableByDetailed[NodeType](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import io.joern.dataflowengineoss.dotgenerator.{DotCpg14Generator, DotDdgGenerat
import io.joern.dataflowengineoss.language._
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.semanticcpg.language.dotextension.{ImageViewer, Shared}
import overflowdb.traversal.Traversal

import io.shiftleft.semanticcpg.language._
class DdgNodeDot(val traversal: Traversal[Method]) extends AnyVal {

def dotDdg(implicit semantics: Semantics = DefaultSemantics()): Traversal[String] =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package io.joern.dataflowengineoss.language.nodemethods
import io.joern.dataflowengineoss.semanticsloader._
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Expression, Method}
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.Traversal

class ExpressionMethods[NodeType <: Expression](val node: NodeType) extends AnyVal {

Expand Down Expand Up @@ -40,7 +39,7 @@ class ExpressionMethods[NodeType <: Expression](val node: NodeType) extends AnyV
* true if these nodes are arguments to the same call, false if otherwise.
*/
def isArgToSameCallWith(other: Expression): Boolean =
node.astParent.collectAll[Call].headOption.equals(other.astParent.collectAll[Call].headOption)
node.astParent.start.collectAll[Call].headOption.equals(other.astParent.start.collectAll[Call].headOption)

/** Determines if this node has a flow to the given target node in the defined semantics.
* @param tgt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import io.joern.dataflowengineoss.queryengine.{Engine, EngineContext, PathElemen
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.semanticcpg.language.{toExpressionMethods, _}
import io.shiftleft.semanticcpg.utils.MemberAccess
import overflowdb.traversal.{Traversal, _}

import scala.collection.mutable
import scala.jdk.CollectionConverters._
Expand All @@ -18,7 +17,9 @@ class ExtendedCfgNodeMethods[NodeType <: CfgNode](val node: NodeType) extends An
*/
def astNode: AstNode = node

def reachableBy[NodeType](sourceTravs: Traversal[NodeType]*)(implicit context: EngineContext): Traversal[NodeType] =
def reachableBy[NodeType](sourceTravs: IterableOnce[NodeType]*)(implicit
context: EngineContext
): Traversal[NodeType] =
node.start.reachableBy(sourceTravs: _*)

def ddgIn(implicit semantics: Semantics = DefaultSemantics()): Traversal[CfgNode] = {
Expand Down Expand Up @@ -61,7 +62,7 @@ class ExtendedCfgNodeMethods[NodeType <: CfgNode](val node: NodeType) extends An
withInvisible: Boolean,
cache: mutable.HashMap[CfgNode, Vector[PathElement]]
)(implicit semantics: Semantics): Traversal[PathElement] = {
val result = ddgInPathElemInternal(path, withInvisible, cache).to(Traversal)
val result = ddgInPathElemInternal(path, withInvisible, cache).iterator
result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ package object language {
new ExpressionMethods(node)

implicit def toExtendedCfgNode[NodeType <: CfgNode](traversal: IterableOnce[NodeType]): ExtendedCfgNode =
new ExtendedCfgNode(traversal)
new ExtendedCfgNode(traversal.iterator)

implicit def toDdgNodeDot(traversal: IterableOnce[Method]): DdgNodeDot =
new DdgNodeDot(traversal)
new DdgNodeDot(traversal.iterator)

implicit def toDdgNodeDotSingle(method: Method): DdgNodeDot =
new DdgNodeDot(Traversal.fromSingle(method))
new DdgNodeDot(method.start)

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@ package io.joern

import io.shiftleft.codepropertygraph.generated.nodes.{Declaration, Expression, Identifier, Literal}
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.Traversal

package object dataflowengineoss {

def globalFromLiteral(lit: Literal): Traversal[Expression] = lit
def globalFromLiteral(lit: Literal): Traversal[Expression] = lit.start
.where(_.inAssignment.method.nameExact("<module>", ":package"))
.inAssignment
.argument(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import io.joern.dataflowengineoss.queryengine.Engine.isOutputArgOfInternalMethod
import io.joern.dataflowengineoss.semanticsloader.{ParamMapping, PosArg, Semantics}
import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode, Expression, StoredNode}
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal._

object EdgeValidator {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ object Engine {
val sameCallSite = parentNode.inCall.l == childNode.start.inCall.l
val visible = if (sameCallSite) {
val semanticExists = parentNode.semanticsForCallByArg.nonEmpty
val internalMethodsForCall = parentNodeCall.flatMap(methodsForCall).to(Traversal).internal
val internalMethodsForCall = parentNodeCall.flatMap(methodsForCall).internal
(semanticExists && parentNode.isDefined) || internalMethodsForCall.isEmpty
} else {
parentNode.isDefined
Expand All @@ -241,11 +241,7 @@ object Engine {
def isOutputArgOfInternalMethod(arg: Expression)(implicit semantics: Semantics): Boolean = {
arg.inCall.l match {
case List(call) =>
methodsForCall(call)
.to(Traversal)
.internal
.isNotStub
.nonEmpty && semanticsForCall(call).isEmpty
methodsForCall(call).internal.isNotStub.nonEmpty && semanticsForCall(call).isEmpty
case _ =>
false
}
Expand All @@ -272,9 +268,7 @@ object Engine {
}

def argToOutputParams(arg: Expression): Traversal[MethodParameterOut] = {
argToMethods(arg)
.to(Traversal)
.parameter
argToMethods(arg).parameter
.index(arg.argumentIndex)
.asOutput
}
Expand All @@ -290,10 +284,7 @@ object Engine {
}

def isCallToInternalMethod(call: Call): Boolean = {
methodsForCall(call)
.to(Traversal)
.internal
.nonEmpty
methodsForCall(call).internal.nonEmpty
}
def isCallToInternalMethodWithoutSemantic(call: Call)(implicit semantics: Semantics): Boolean = {
isCallToInternalMethod(call) && semanticsForCall(call).isEmpty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ object SourcesToStartingPoints {

private val log = LoggerFactory.getLogger(SourcesToStartingPoints.getClass)

def sourceTravsToStartingPoints[NodeType](sourceTravs: Traversal[NodeType]*): List[StartingPointWithSource] = {
def sourceTravsToStartingPoints[NodeType](sourceTravs: IterableOnce[NodeType]*): List[StartingPointWithSource] = {
val fjp = ForkJoinPool.commonPool()
try {
fjp.invoke(new SourceTravsToStartingPointsTask(sourceTravs: _*)).distinct
Expand All @@ -34,14 +34,14 @@ object SourcesToStartingPoints {

}

class SourceTravsToStartingPointsTask[NodeType](sourceTravs: Traversal[NodeType]*)
class SourceTravsToStartingPointsTask[NodeType](sourceTravs: IterableOnce[NodeType]*)
extends RecursiveTask[List[StartingPointWithSource]] {

private val log = LoggerFactory.getLogger(this.getClass)

override def compute(): List[StartingPointWithSource] = {
val sources: List[StoredNode] = sourceTravs
.flatMap(_.toList)
.flatMap(_.iterator.toList)
.collect { case n: StoredNode => n }
.dedup
.toList
Expand Down Expand Up @@ -119,7 +119,7 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
x.argument(2).isFieldIdentifier.canonicalNameExact(identifier.name)
case fieldIdentifier: FieldIdentifier =>
x.argument(2).isFieldIdentifier.canonicalNameExact(fieldIdentifier.canonicalName)
case _ => List()
case _ => Iterator.empty
}
}
.takeWhile(notLeftHandOfAssignment)
Expand Down Expand Up @@ -197,7 +197,7 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
}

private def isTargetInAssignment(identifier: Identifier): List[Identifier] = {
Traversal(identifier).argumentIndex(1).where(_.inAssignment).l
identifier.start.argumentIndex(1).where(_.inAssignment).l
}

private def notLeftHandOfAssignment(x: Expression): Boolean = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Expression, MethodParameterIn, MethodParameterOut, Return}
import io.shiftleft.semanticcpg.language.NoResolve
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.{NodeOps, Traversal}

/** Creation of new tasks from results of completed tasks.
*/
Expand Down Expand Up @@ -72,7 +71,6 @@ class TaskCreator(context: EngineContext) {
private def paramToArgsOfCallers(param: MethodParameterIn): List[Expression] =
NoResolve
.getMethodCallsites(param.method)
.to(Traversal)
.collectAll[Call]
.argument(param.index)
.l
Expand All @@ -99,10 +97,10 @@ class TaskCreator(context: EngineContext) {

val methodReturns = outCall.toList
.flatMap(x => NoResolve.getCalledMethods(x).methodReturn.map(y => (x, y)))
.to(Traversal)
.iterator

methodReturns.flatMap { case (call, methodReturn) =>
val method = methodReturn.method.head
val method = methodReturn.method
val returnStatements = methodReturn._reachingDefIn.toList.collect { case r: Return => r }
if (method.isExternal || method.start.isStub.nonEmpty) {
val newPath = path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package io.joern.dataflowengineoss.queryengine
import io.joern.dataflowengineoss.queryengine.QueryEngineStatistics.{PATH_CACHE_HITS, PATH_CACHE_MISSES}
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.semanticcpg.language.{toCfgNodeMethods, toExpressionMethods}
import io.shiftleft.semanticcpg.language.{toCfgNodeMethods, toExpressionMethods, _}

import java.util.concurrent.Callable
import scala.collection.mutable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames}
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.Traversal

import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.{ForkJoinPool, RecursiveTask}
Expand Down Expand Up @@ -104,7 +103,7 @@ object UsageSlicing {
.nameExact("<operator>.new")
.lastOption
.map(_.argument)
.getOrElse(Traversal.empty)
.getOrElse(Iterator.empty)
else baseCall.argument)
.collect { case n: Expression if n.argumentIndex > 0 => n }
.flatMap {
Expand Down
2 changes: 1 addition & 1 deletion joern-cli/frontends/c2cpg/build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "c2cpg"
scalaVersion := "2.13.8"
crossScalaVersions := Seq("2.13.8", "3.2.2")
crossScalaVersions := Seq("2.13.8", "3.3.0")

dependsOn(Projects.semanticcpg, Projects.dataflowengineoss % Test, Projects.x2cpg % "compile->compile;test->test")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -757,9 +757,9 @@ class AstCreationPassTests extends AbstractPassTest {
val List(expressionListCall) = bracketedPrimaryCall.argument.isCall.l
expressionListCall.name shouldBe "<operator>.expressionList"

val List(arg1) = expressionListCall.argument(1).collectAll[Call].l
val List(arg1) = expressionListCall.argument(1).start.collectAll[Call].l
arg1.code shouldBe "__sync_synchronize()"
val List(arg2) = expressionListCall.argument(2).collectAll[Call].l
val List(arg2) = expressionListCall.argument(2).start.collectAll[Call].l
arg2.code shouldBe "foo(x)"
}

Expand Down Expand Up @@ -1688,8 +1688,8 @@ class AstCreationPassTests extends AbstractPassTest {
call2.argument.code.l shouldBe List("2", "10")
call3.code shouldBe "[3 ... 9] = 15"
call3.name shouldBe Operators.assignment
val List(desCall) = call3.argument(1).collectAll[Call].l
val List(value) = call3.argument(2).collectAll[Literal].l
val List(desCall) = call3.argument(1).start.collectAll[Call].l
val List(value) = call3.argument(2).start.collectAll[Literal].l
value.code shouldBe "15"
desCall.name shouldBe Operators.arrayInitializer
desCall.code shouldBe "[3 ... 9]"
Expand Down Expand Up @@ -1727,8 +1727,8 @@ class AstCreationPassTests extends AbstractPassTest {
call2.argument.code.l shouldBe List("2", "10")
call3.code shouldBe "[3 ... 9] = 15"
call3.name shouldBe Operators.assignment
val List(desCall) = call3.argument(1).collectAll[Call].l
val List(value) = call3.argument(2).collectAll[Literal].l
val List(desCall) = call3.argument(1).start.collectAll[Call].l
val List(value) = call3.argument(2).start.collectAll[Literal].l
value.code shouldBe "15"
desCall.name shouldBe Operators.arrayInitializer
desCall.code shouldBe "[3 ... 9]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import io.joern.c2cpg.testfixtures.CCodeToCpgSuite
import io.shiftleft.codepropertygraph.generated.{Languages, NodeTypes}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import overflowdb.traversal._

/** The following tests show in detail how queries can be started. For all node types, for which it seems reasonable,
* all nodes of that type can be used as a starting point, e.g., `cpg.method` starts at all methods while `cpg.local`
Expand Down
2 changes: 1 addition & 1 deletion joern-cli/frontends/javasrc2cpg/build.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name := "javasrc2cpg"

scalaVersion := "2.13.8"
crossScalaVersions := Seq("2.13.8", "3.2.2")
crossScalaVersions := Seq("2.13.8", "3.3.0")

dependsOn(Projects.dataflowengineoss, Projects.x2cpg % "compile->compile;test->test")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class NewCallTests extends JavaSrcCode2CpgFixture {
.argument(0)
.l match {
case List(thisNode: Identifier) =>
thisNode.outE.collectAll[Ref].map(_.inNode).l match {
thisNode._refOut.l match {
case List(paramNode: MethodParameterIn) =>
paramNode.name shouldBe "this"
paramNode.method.fullName shouldBe s"Foo.${io.joern.x2cpg.Defines.ConstructorMethodName}:void()"
Expand Down
Loading

0 comments on commit f74cc7a

Please sign in to comment.