Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add config to TestCpg and filter php files #2733

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import scopt.OParser
final case class Config(
inputPath: String = "",
outputPath: String = X2CpgConfig.defaultOutputPath,
phpIni: Option[String] = None
phpIni: Option[String] = None,
excludeOverrides: Option[List[String]] = None
) extends X2CpgConfig[Config] {

override def withInputPath(inputPath: String): Config =
Expand All @@ -28,7 +29,11 @@ private object Frontend {
programName("php2cpg"),
opt[String]("php-ini")
.action((x, c) => c.copy(phpIni = Some(x)))
.text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern.")
.text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."),
opt[Seq[String]]("exclude-overrides")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we split this in two? I think from an end-user's perspective they either want to disable the default exclusions or add additional exclusions, but rarely both at the same time.

.valueName("regex1,regex2,...")
.action((x, c) => c.copy(excludeOverrides = Some(x.toList)))
.text("Comma separated list of regexes where matches exclude files or directories. Overrides default list.")
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,40 @@ import better.files.File
import io.joern.php2cpg.Config
import io.joern.php2cpg.astcreation.AstCreator
import io.joern.php2cpg.parser.PhpParser
import io.joern.php2cpg.utils.PathFilter
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.datastructures.Global
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ConcurrentWriterCpgPass
import org.slf4j.LoggerFactory

import scala.jdk.CollectionConverters._
import java.nio.file.PathMatcher

class AstCreationPass(config: Config, cpg: Cpg) extends ConcurrentWriterCpgPass[String](cpg) {

private val logger = LoggerFactory.getLogger(this.getClass)
val global = new Global()

val PhpSourceFileExtensions: Set[String] = Set(".php")
private val pathFilter = PathFilter(config.excludeOverrides)

override def generateParts(): Array[String] = SourceFiles.determine(config.inputPath, PhpSourceFileExtensions).toArray
override def generateParts(): Array[String] =
SourceFiles.determine(config.inputPath, PhpSourceFileExtensions).toArray

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val relativeFilename = File(config.inputPath).relativize(File(filename)).toString
PhpParser.parseFile(filename, config.phpIni) match {
case Some(parseResult) =>
diffGraph.absorb(new AstCreator(relativeFilename, parseResult, global).createAst())

case None =>
logger.warn(s"Could not parse file $filename. Results will be missing!")
if (pathFilter.excluded(relativeFilename)) {
logger.debug(s"Skipping file due to matched exclusion: $relativeFilename")
} else {
PhpParser.parseFile(filename, config.phpIni) match {
case Some(parseResult) =>
diffGraph.absorb(new AstCreator(relativeFilename, parseResult, global).createAst())

case None =>
logger.warn(s"Could not parse file $filename. Results will be missing!")
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package io.joern.php2cpg.utils

import io.joern.php2cpg.utils.PathFilter.standardisePath
import org.slf4j.LoggerFactory

import java.io.File
import java.nio.file.Paths
import scala.util.matching.Regex

class PathFilter(excludeOverrides: Option[List[String]]) {
private val logger = LoggerFactory.getLogger(this.getClass)

private val sep = if (File.separator == "/") File.separator else "\\\\"
private val startOrSep = s"^([^$sep]+$sep)*"
private val anyDirSuffix = s"$sep.*"

def dirExclude(dirName: String): Regex = {
val path = escapeSeparators(standardisePath(dirName))
s"${startOrSep}$path${anyDirSuffix}".r
}

private def escapeSeparators(filename: String): String = {
if (File.separator == "/")
filename
else
filename.replaceAll(raw"\\", raw"\\\\")
}

private val excludeRegexes = {
excludeOverrides.getOrElse(PathFilter.DefaultExcludes).map(dirExclude)
}

def excluded(filename: String): Boolean = {
excludeRegexes.exists(_.matches(filename))
}
}

object PathFilter {
val DefaultExcludes: List[String] = List(
"tests",
// For composer projects
"vendor",
".git"
)

def standardisePath(filename: String): String = {
// Synthetic filename, so don't correct
if (filename.contains("<"))
filename
else
Paths.get(filename).toString
}

def apply(excludes: Option[List[String]]): PathFilter = {
new PathFilter(excludes)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package io.joern.php2cpg.querying

import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture
import io.joern.php2cpg.utils.PathFilter
import io.joern.x2cpg.testfixtures.TestCpg
import io.shiftleft.semanticcpg.language._
import io.joern.php2cpg.Config

class FileTests extends PhpCode2CpgFixture {
val testFiles = List(
"a.php",
"src/b.php",
"src/sub/c.php",
"tests/d.php",
"tests/sub/e.php",
"vendor/autoload.php",
"vendor/dep/f.php",
"misc/g.php",
".git/h.php",
"sub/tests/i.php",
"nottests/j.php"
)

private def getCpg(): TestCpg = {
val cpg = code("", fileName = testFiles.head)
testFiles.tail.foreach(path => cpg.moreCode("", fileName = PathFilter.standardisePath(path)))
cpg
}

"test, vendor, and .git directories should be filtered out by default" in {
val cpg = getCpg()

cpg.file.name.toSet shouldBe Set("a.php", "src/b.php", "src/sub/c.php", "misc/g.php", "nottests/j.php", "<unknown>")
.map(PathFilter.standardisePath)
}

"custom excludes should exclude the correct directories" in {
val config = Config(excludeOverrides = Some(List("dep", "src/sub/", "doesntexist")))
val cpg = getCpg().config(config)

cpg.file.name.toSet shouldBe Set(
"a.php",
"src/b.php",
"tests/d.php",
"tests/sub/e.php",
"vendor/autoload.php",
"misc/g.php",
".git/h.php",
"sub/tests/i.php",
"nottests/j.php",
"<unknown>"
).map(PathFilter.standardisePath)
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with other frontends we've seen bugs with filtering for untested combinations of absolute and relative paths for a) the input path and b) the excluded path. Can you add some tests for here to ensure that works as intended in all cases?

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve}

import java.io.File
import io.joern.x2cpg.X2CpgConfig

trait PhpFrontend extends LanguageFrontend {
override val fileSuffix: String = ".php"
Expand All @@ -15,6 +16,15 @@ trait PhpFrontend extends LanguageFrontend {
implicit val defaultConfig: Config = Config()
new Php2Cpg().createCpg(sourceCodeFile.getAbsolutePath).get
}

override def execute[T <: X2CpgConfig[_]](sourceCodeFile: File, config: T): Cpg = {
config match {
case phpConfig: Config =>
new Php2Cpg().createCpg(sourceCodeFile.getAbsolutePath)(phpConfig).get
case _ =>
throw new RuntimeException(s"Cannot invoke php2cpg with config type ${config.getClass().getCanonicalName()}")
}
}
}

class DefaultTestCpgWithPhp extends DefaultTestCpg with PhpFrontend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package io.joern.x2cpg.testfixtures
import io.shiftleft.codepropertygraph.Cpg

import java.io.File
import io.joern.x2cpg.X2CpgConfig
import scala.annotation.nowarn

/** LanguageFrontend encapsulates the logic that translates the source code directory into CPGs
*/
Expand All @@ -19,4 +21,16 @@ trait LanguageFrontend {
* CPG representation stored in a file
*/
def execute(sourceCodeFile: File): Cpg

/** Generate CPG for the given source directory with the option to override the method to pass config down.
* @param sourceCodeFile
* direcotry where source code is located
* @param config
* frontend config: ignored unless overridden
* @return
* CPG representation stored in a file
*/
def execute[T <: X2CpgConfig[_]](sourceCodeFile: File, @nowarn config: T): Cpg = {
execute(sourceCodeFile)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import java.nio.file.{Files, Path, Paths}
import java.util.Comparator
import scala.annotation.nowarn
import scala.collection.mutable
import io.joern.x2cpg.X2CpgConfig

// Lazily populated test CPG which is created upon first access to the underlying graph.
// The trait LanguageFrontend is mixed in and not property/field of this class in order
Expand All @@ -16,6 +17,7 @@ abstract class TestCpg extends Cpg() with LanguageFrontend {
private var _graph = Option.empty[Graph]
private val codeFileNamePairs = mutable.ArrayBuffer.empty[(String, Path)]
private var fileNameCounter = 0
private var _config = Option.empty[X2CpgConfig[_]]

@nowarn
protected def codeFilePreProcessing(codeFile: Path): Unit = {}
Expand All @@ -34,6 +36,22 @@ abstract class TestCpg extends Cpg() with LanguageFrontend {
this
}

def config[T <: X2CpgConfig[_]](newConfig: T): this.type = {
checkConfigEmpty()
this._config = Some(newConfig)
this
}

private def checkConfigEmpty(): Unit = {
val messages = List(
Option.when(_graph.isDefined)("Modifying test config is not allowed after accessing graph."),
Option.when(_config.isDefined)("Test config may only be added once")
).flatten
if (messages.nonEmpty) {
throw new RuntimeException(messages.mkString("\n"))
}
}

private def checkGraphEmpty(): Unit = {
if (_graph.isDefined) {
throw new RuntimeException("Modifying test data is not allowed after accessing graph.")
Expand Down Expand Up @@ -65,7 +83,9 @@ abstract class TestCpg extends Cpg() with LanguageFrontend {
if (_graph.isEmpty) {
val codeDir = codeToFileSystem()
try {
_graph = Option(execute(codeDir.toFile).graph)
_graph = _config
.map(execute(codeDir.toFile, _).graph)
.orElse(Option(execute(codeDir.toFile).graph))
applyPasses()
} finally {
deleteDir(codeDir)
Expand Down