Skip to content

Commit

Permalink
Separate Joern Slice Modes into Commands (#2817)
Browse files Browse the repository at this point in the history
* Using `scopt`'s `cmd` feature to separate modes into commands
* Added `BaseConfig` trait to seamlessly separate configs between commands
* Added early stopping for invalid options
  • Loading branch information
DavidBakerEffendi committed Jun 6, 2023
1 parent 421c19a commit ce33885
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import scala.collection.mutable.ArrayBuffer

object DataFlowSlicing {

def calculateDataFlowSlice(cpg: Cpg, config: SliceConfig): ProgramDataFlowSlice = {
val sliceMapping = (config.sourceFile match {
def calculateDataFlowSlice(cpg: Cpg, config: DataFlowConfig): ProgramDataFlowSlice = {
val sliceMapping = (config.fileFilter match {
case Some(fileName) => cpg.file.nameExact(fileName).ast.isCall
case None => cpg.call
}).toBuffer.groupBy[Method] { _.method }.map { case (m: Method, calls: ArrayBuffer[Call]) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ object UsageSlicing {
* @return
* a set of object slices.
*/
def calculateUsageSlice(cpg: Cpg, config: SliceConfig): ProgramSlice = {
def calculateUsageSlice(cpg: Cpg, config: UsagesConfig): ProgramSlice = {
excludeOperatorCalls.set(config.excludeOperatorCalls)

def getAssignmentDecl: Traversal[Declaration] = (config.sourceFile match {
def getAssignmentDecl: Traversal[Declaration] = (config.fileFilter match {
case Some(fileName) => cpg.file.nameExact(fileName).assignment
case None => cpg.assignment
}).argument(1).isIdentifier.refsTo

def getParameterDecl: Traversal[MethodParameterIn] = config.sourceFile match {
def getParameterDecl: Traversal[MethodParameterIn] = config.fileFilter match {
case Some(fileName) => cpg.file.nameExact(fileName).ast.isParameter
case None => cpg.parameter
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package io.joern.dataflowengineoss

import better.files.File
import io.circe.{Decoder, Encoder, HCursor, Json}
import io.joern.dataflowengineoss.slicing.SliceMode.SliceModes
import io.shiftleft.codepropertygraph.generated.PropertyNames
import io.shiftleft.codepropertygraph.generated.nodes._

Expand All @@ -11,23 +10,39 @@ package object slicing {
import io.circe.generic.auto._
import io.circe.syntax.EncoderOps

/** The kind of mode to use for slicing.
*/
object SliceMode extends Enumeration {
type SliceModes = Value
val DataFlow, Usages = Value
sealed trait BaseConfig {
def inputPath: File = File("cpg.bin")

def outFile: File = File("slices")

def dummyTypesEnabled: Boolean = false

def fileFilter: Option[String] = None
}

case class SliceConfig(
inputPath: File = File("cpg.bin"),
outFile: File = File("slices"),
sliceMode: SliceModes = SliceMode.DataFlow,
sourceFile: Option[String] = None,
sliceDepth: Int = 20,
override val inputPath: File = File("cpg.bin"),
override val outFile: File = File("slices"),
override val dummyTypesEnabled: Boolean = false,
override val fileFilter: Option[String] = None
) extends BaseConfig

case class DataFlowConfig(
override val inputPath: File = File("cpg.bin"),
override val outFile: File = File("slices"),
override val dummyTypesEnabled: Boolean = false,
override val fileFilter: Option[String] = None,
sliceDepth: Int = 20
) extends BaseConfig

case class UsagesConfig(
override val inputPath: File = File("cpg.bin"),
override val outFile: File = File("slices"),
override val dummyTypesEnabled: Boolean = false,
override val fileFilter: Option[String] = None,
minNumCalls: Int = 1,
typeRecoveryDummyTypes: Boolean = false,
excludeOperatorCalls: Boolean = false
)
) extends BaseConfig

/** A trait for all objects that represent a 1:1 relationship between the CPG and all the slices extracted.
*/
Expand Down
176 changes: 107 additions & 69 deletions joern-cli/src/main/scala/io/joern/joerncli/JoernSlice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,113 @@ import scala.util.Using

object JoernSlice {

import io.joern.dataflowengineoss.slicing.SliceMode._
import io.joern.dataflowengineoss.slicing._

implicit val sliceModeRead: scopt.Read[SliceModes] =
scopt.Read.reads(SliceMode withName)

case class Config(
inputPath: File = File("cpg.bin"),
outFile: File = File("slices"),
sliceMode: SliceModes = DataFlow,
sourceFile: Option[String] = None,
sliceDepth: Int = 20,
minNumCalls: Int = 1,
typeRecoveryDummyTypes: Boolean = false,
excludeOperatorCalls: Boolean = false
)
private val configParser = new scopt.OptionParser[BaseConfig]("joern-slice") {
head("Extract intra-procedural slices from the CPG.")
help("help")
arg[String]("cpg")
.text("input CPG file name - defaults to `cpg.bin`")
.optional()
.action { (x, c) =>
val path = File(x)
if (!path.isRegularFile) failure(s"File at '$x' not found or not regular, e.g. a directory.")
c match {
case x: SliceConfig => x.copy(inputPath = path)
case x: DataFlowConfig => x.copy(inputPath = path)
case x: UsagesConfig => x.copy(inputPath = path)
case _ => SliceConfig(inputPath = path)
}
}
opt[String]('o', "out")
.text("the output file to write slices to - defaults to `slices`. The file is suffixed based on the mode.")
.action((x, c) =>
c match {
case c: SliceConfig => c.copy(outFile = File(x))
case c: DataFlowConfig => c.copy(outFile = File(x))
case c: UsagesConfig => c.copy(outFile = File(x))
case _ => SliceConfig(outFile = File(x))
}
)
opt[Unit]("dummy-types")
.text(s"for generating CPGs that use type recovery, enables the use of dummy types - defaults to false.")
.action((_, c) =>
c match {
case c: SliceConfig => c.copy(dummyTypesEnabled = true)
case c: DataFlowConfig => c.copy(dummyTypesEnabled = true)
case c: UsagesConfig => c.copy(dummyTypesEnabled = true)
case _ => SliceConfig(dummyTypesEnabled = true)
}
)
opt[String]("file-filter")
.text(s"the name of the source file to generate slices from.")
.action((x, c) =>
c match {
case c: SliceConfig => c.copy(fileFilter = Option(x))
case c: DataFlowConfig => c.copy(fileFilter = Option(x))
case c: UsagesConfig => c.copy(fileFilter = Option(x))
case _ => SliceConfig(fileFilter = Option(x))
}
)
cmd("data-flow")
.action((_, c) => DataFlowConfig(c.inputPath, c.outFile, c.dummyTypesEnabled))
.children(
opt[Int]("slice-depth")
.text(s"the max depth to traverse the DDG for the data-flow slice - defaults to 20.")
.action((x, c) =>
c match {
case c: DataFlowConfig => c.copy(sliceDepth = x)
case _ => c
}
)
)
cmd("usages")
.action((_, c) => UsagesConfig(c.inputPath, c.outFile, c.dummyTypesEnabled))
.children(
opt[Int]("min-num-calls")
.text(s"the minimum number of calls required for a usage slice - defaults to 1.")
.action((x, c) =>
c match {
case c: UsagesConfig => c.copy(minNumCalls = x)
case _ => c
}
),
opt[Unit]("exclude-operators")
.text(s"excludes operator calls in the slices - defaults to false.")
.action((_, c) =>
c match {
case c: UsagesConfig => c.copy(excludeOperatorCalls = true)
case _ => c
}
)
)
}

def main(args: Array[String]): Unit = {
parseConfig(args).foreach { config =>
val inputCpgPath =
if (
config.inputPath.isDirectory || !config.inputPath
.extension(includeDot = false)
.exists(_.matches("(bin|cpg)"))
)
generateTempCpg(config)
else config.inputPath.pathAsString
Using.resource(CpgBasedTool.loadFromOdb(inputCpgPath)) { cpg =>
checkAndApplyOverlays(cpg)
// Slice the CPG
val slice: ProgramSlice = config.sliceMode match {
case DataFlow => DataFlowSlicing.calculateDataFlowSlice(cpg, config)
case Usages => UsageSlicing.calculateUsageSlice(cpg, config)
if (config.isInstanceOf[SliceConfig]) {
configParser.reportError("No command specified! Use --help for more information.")
} else {
val inputCpgPath =
if (
config.inputPath.isDirectory || !config.inputPath
.extension(includeDot = false)
.exists(_.matches("(bin|cpg)"))
)
generateTempCpg(config)
else config.inputPath.pathAsString
Using.resource(CpgBasedTool.loadFromOdb(inputCpgPath)) { cpg =>
checkAndApplyOverlays(cpg)
// Slice the CPG
(config match {
case x: DataFlowConfig => Option(DataFlowSlicing.calculateDataFlowSlice(cpg, x))
case x: UsagesConfig => Option(UsageSlicing.calculateUsageSlice(cpg, x))
case _ => None
}) match {
case Some(programSlice: ProgramSlice) => saveSlice(config.outFile, programSlice)
case None =>
}
}
saveSlice(config.outFile, slice)
}
}
}
Expand All @@ -57,22 +129,22 @@ object JoernSlice {
private def checkAndApplyOverlays(cpg: Cpg): Unit = {
import io.shiftleft.semanticcpg.language._

if (!cpg.metaData.overlays.exists(_ == Base.overlayName)) {
if (!cpg.metaData.overlays.contains(Base.overlayName)) {
println("Default overlays are not detected, applying defaults now")
X2Cpg.applyDefaultOverlays(cpg)
}
if (!cpg.metaData.overlays.exists(_ == OssDataFlow.overlayName)) {
if (!cpg.metaData.overlays.contains(OssDataFlow.overlayName)) {
println("Data-flow overlay is not detected, applying now")
new OssDataFlow(new OssDataFlowOptions()).run(new LayerCreatorContext(cpg))
}
}

private def generateTempCpg(config: SliceConfig): String = {
private def generateTempCpg(config: BaseConfig): String = {
val tmpFile = File.newTemporaryFile("joern-slice", ".bin")
println(s"Generating CPG from code at ${config.inputPath.pathAsString}")
(JoernParse.run(
ParserConfig(config.inputPath.pathAsString, outputCpgFile = tmpFile.pathAsString),
if (config.typeRecoveryDummyTypes) List.empty else List("--no-dummyTypes")
if (config.dummyTypesEnabled) List.empty else List("--no-dummyTypes")
) match {
case Right(_) =>
println(s"Temporary CPG has been successfully generated at ${tmpFile.pathAsString}")
Expand All @@ -84,42 +156,8 @@ object JoernSlice {
}
}

private def parseConfig(args: Array[String]): Option[SliceConfig] =
new scopt.OptionParser[SliceConfig]("joern-slice") {
head("Extract intra-procedural slices from the CPG.")
help("help")
arg[String]("cpg")
.text("input CPG file name - defaults to `cpg.bin`")
.optional()
.action { (x, c) =>
val path = File(x)
if (!path.isRegularFile) failure(s"File at '$x' not found or not regular, e.g. a directory.")
c.copy(inputPath = path)
}
opt[String]('o', "out")
.text("the output file to write slices to - defaults to `slices`. The file is suffixed based on the mode.")
.action((x, c) => c.copy(outFile = File(x)))
opt[SliceModes]('m', "mode")
.text(s"the kind of slicing to perform - defaults to `DataFlow`. Options: [${SliceMode.values.mkString(", ")}]")
.action((x, c) => c.copy(sliceMode = x))
opt[String]("source-file")
.text("the name of the source file to generate slices from.")
.optional()
.action((x, c) => c.copy(sourceFile = Some(x)))
opt[Int]("slice-depth")
.text(s"the max depth to traverse the DDG for the data-flow slice (for `DataFlow` mode) - defaults to 20.")
.action((x, c) => c.copy(minNumCalls = x))
opt[Int]("min-num-calls")
.text(s"the minimum number of calls required for a usage slice (for `Usage` mode) - defaults to 1.")
.action((x, c) => c.copy(minNumCalls = x))
opt[Boolean]("dummy-types")
.text(s"for generating CPGs that use type recovery, enables the use of dummy types - defaults to false.")
.action((x, c) => c.copy(typeRecoveryDummyTypes = x))
opt[Boolean]("exclude-operators")
.text(s"excludes operator calls in the slices - defaults to false.")
.action((x, c) => c.copy(excludeOperatorCalls = x))

}.parse(args, SliceConfig())
private def parseConfig(args: Array[String]): Option[BaseConfig] =
configParser.parse(args, SliceConfig())

private def saveSlice(outFile: File, programSlice: ProgramSlice): Unit = {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.joern.joerncli

import better.files.File
import io.joern.dataflowengineoss.slicing.{SliceConfig, DefComponent, ProgramUsageSlice, UsageSlicing}
import io.joern.dataflowengineoss.slicing._
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.{Languages, Operators}
import org.scalatest.matchers.should.Matchers
Expand All @@ -15,7 +15,7 @@ class JoernSliceTests extends AnyWordSpec with Matchers with AbstractJoernCliTes
) { case (cpg: Cpg, _) =>
val programSlice =
UsageSlicing
.calculateUsageSlice(cpg, SliceConfig(excludeOperatorCalls = true))
.calculateUsageSlice(cpg, UsagesConfig(excludeOperatorCalls = true))
.asInstanceOf[ProgramUsageSlice]

"extract 'express.js' slice" in {
Expand Down Expand Up @@ -78,7 +78,7 @@ class JoernSliceTests extends AnyWordSpec with Matchers with AbstractJoernCliTes
) { case (cpg: Cpg, _) =>
val programSlice =
UsageSlicing
.calculateUsageSlice(cpg, SliceConfig())
.calculateUsageSlice(cpg, UsagesConfig())
.asInstanceOf[ProgramUsageSlice]

"extract 'name' parameter slice from 'startScene'" in {
Expand Down

0 comments on commit ce33885

Please sign in to comment.