Skip to content

Commit

Permalink
[gosrc2cpg] Partial gosrc2cpg frontend perforamnce optimisations (#4668)
Browse files Browse the repository at this point in the history
* Partial gosrc2cpg frontend perforamnce optimisations

1. Changed parsing of AST json inside AST Creator itself.
2. Made changes in download dependency processing to generate AST only
for the used packages and subsequently only processing used packages.
There was a change required inside `goastgen` utility to support the
`-include` input option.

TODO:
At this moment `AstCreator` gets instantiated while first-level
processing for building the cache and it stays in memory till
`AstCreationPass` is done. In subsequent changes, we will split the
processing in such a way that we create `AstCreator` for that pass and
destroy it once it's used. Instantiate it again for `AstCreationPass`.

The above change is done as part of the larger change.

* small pending change

* Refactored some code to use meaningful name in the context
  • Loading branch information
pandurangpatil committed Jun 18, 2024
1 parent 80b0938 commit 53c2c77
Show file tree
Hide file tree
Showing 11 changed files with 122 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ package io.joern.csharpsrc2cpg.utils
import better.files.File
import io.joern.csharpsrc2cpg.Config
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.astgen.AstGenRunner.{AstGenProgramMetaData, DefaultAstGenRunnerResult, getClass}
import io.joern.x2cpg.astgen.AstGenRunner.{AstGenProgramMetaData, getClass}
import io.joern.x2cpg.astgen.AstGenRunnerBase
import io.joern.x2cpg.utils.ExternalCommand
import org.slf4j.LoggerFactory

import scala.collection.mutable
import scala.util.{Failure, Success, Try}
import scala.util.Try

class DotNetAstGenRunner(config: Config) extends AstGenRunnerBase(config) {

Expand Down Expand Up @@ -60,7 +60,7 @@ class DotNetAstGenRunner(config: Config) extends AstGenRunnerBase(config) {
}.toList
}

override def runAstGenNative(in: String, out: File, exclude: String)(implicit
override def runAstGenNative(in: String, out: File, exclude: String, include: String)(implicit
metaData: AstGenProgramMetaData
): Try[Seq[String]] = {
val excludeCommand = if (exclude.isEmpty) "" else s"-e \"$exclude\""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
gosrc2cpg {
goastgen_version: "0.14.0"
goastgen_version: "0.15.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.X2CpgFrontend
import io.joern.x2cpg.passes.frontend.MetaDataPass
import io.joern.x2cpg.utils.Report
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.Languages
import io.shiftleft.codepropertygraph.generated.{Cpg, Languages}

import java.nio.file.Paths
import scala.util.Try
Expand All @@ -43,7 +42,14 @@ class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X
)
goGlobal.mainModule = goMod.flatMap(modHelper => modHelper.getModMetaData().map(mod => mod.module.name))
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod.get, goGlobal)
new MethodAndTypeCacheBuilderPass(
Some(cpg),
astGenResult.parsedFiles,
config,
goMod.get,
goGlobal,
tmpDir
)
.process()
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@ package io.joern.gosrc2cpg.astcreation
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.gosrc2cpg.parser.{GoAstJsonParser, ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.astgen.{AstGenNodeBuilder, ParserResult}
import io.joern.x2cpg.datastructures.Scope
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder}
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode}
import io.shiftleft.codepropertygraph.generated.nodes.NewNode
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes}
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate.DiffGraphBuilder
import ujson.Value

import java.nio.file.Paths
import scala.collection.mutable

class AstCreator(
val jsonAstFilePath: String,
val relPathFileName: String,
val parserResult: ParserResult,
val goMod: GoModHelper,
val goGlobal: GoGlobal
)(implicit withSchemaValidation: ValidationMode)
Expand All @@ -36,9 +37,10 @@ class AstCreator(
with AstForLambdaCreator
with AstGenNodeBuilder[AstCreator] {

protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator])
protected val methodAstParentStack: Stack[NewNode] = new Stack()
protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope()
protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator])
val parserResult = GoAstJsonParser.readFile(Paths.get(jsonAstFilePath))
protected val methodAstParentStack: Stack[NewNode] = new Stack()
protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope()
protected val aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty
protected val lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent)
protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserAst, ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
import io.joern.x2cpg.{Ast, Defines as XDefines}
import io.shiftleft.codepropertygraph.generated.nodes.{NewModifier, NewNode}
import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, ModifierTypes, PropertyNames}
import org.apache.commons.lang3.StringUtils
import ujson.Value

import scala.collection.mutable
Expand Down Expand Up @@ -122,13 +120,20 @@ trait AstCreatorHelper { this: AstCreator =>
protected def columnEndNo(node: Value): Option[Integer] = Try(node(ParserKeys.NodeColEndNo).num).toOption.map(_.toInt)

protected def positionLookupTables(source: String): Map[Int, String] = {
source
.split("\n")
.zipWithIndex
.map { case (sourceLine, lineNumber) =>
(lineNumber + 1, sourceLine)
}
.toMap
val result = if (!goGlobal.processingDependencies) {
val map = parserResult.fileContent
.split("\n")
.zipWithIndex
.map { case (sourceLine, lineNumber) =>
(lineNumber + 1, sourceLine)
}
.toMap
map
} else {
Map[Int, String]()
}
parserResult.fileContent = ""
result
}

protected def resolveAliasToFullName(alias: String): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class GoModHelper(config: Option[Config] = None, meta: Option[GoMod] = None) {
for (dependency <- mod.dependencies) {
if (importStmt.startsWith(dependency.module)) {
dependency.beingUsed = true
dependency.usedPackages.add(importStmt)
dependency.usedPackages.add(importStmt.replace(dependency.module, ""))
}
}
}
Expand Down Expand Up @@ -129,7 +129,10 @@ case class GoModDependency(
endLineNo: Option[Int] = None,
endColNo: Option[Int] = None,
usedPackages: util.Set[String] = new ConcurrentSkipListSet[String]()
)
) {
def getIncludePackageRegex(): String = usedPackages.toArray.mkString("(", "|", ")")
def dependencyStr(): String = s"$module@$version"
}

implicit val javaSetRw: ReadWriter[util.Set[String]] = {
import scala.jdk.CollectionConverters.*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package io.joern.gosrc2cpg.passes
import better.files.File
import io.joern.gosrc2cpg.Config
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.model.{GoModDependency, GoModHelper}
import io.joern.gosrc2cpg.parser.GoAstJsonParser
import io.joern.gosrc2cpg.utils.AstGenRunner
import io.joern.gosrc2cpg.utils.AstGenRunner.{GoAstGenRunnerResult, getClass}
Expand All @@ -12,65 +12,85 @@ import org.slf4j.LoggerFactory

import java.io.File as JFile
import java.nio.file.Paths
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import java.util.concurrent.LinkedBlockingQueue
import scala.util.{Failure, Success, Try}

class DownloadDependenciesPass(parentGoMod: GoModHelper, goGlobal: GoGlobal, config: Config) {
private val logger = LoggerFactory.getLogger(getClass)
def process(): Unit = {
val processor = new DependencyProcessorQueue()
val processorThread = new Thread(processor)
processorThread.start()
File.usingTemporaryDirectory("go-temp-download") { tmpDir =>
setupDummyProjectAndDownload(tmpDir.toString)
}
}

private def setupDummyProjectAndDownload(prjDir: String): Unit = {
parentGoMod
.getModMetaData()
.foreach(mod => {
ExternalCommand.run("go mod init joern.io/temp", prjDir) match {
case Success(_) =>
val futures = mod.dependencies
.filter(dep => dep.beingUsed)
.map(dependency => {
Future {
val dependencyStr = s"${dependency.module}@${dependency.version}"
val cmd = s"go get $dependencyStr"
val results = synchronized(ExternalCommand.run(cmd, prjDir))
val projDir = tmpDir.pathAsString
parentGoMod
.getModMetaData()
.foreach(mod => {
ExternalCommand.run("go mod init joern.io/temp", projDir) match {
case Success(_) =>
mod.dependencies
.filter(dep => dep.beingUsed)
.map(dependency => {
val cmd = s"go get ${dependency.dependencyStr()}"
val results = ExternalCommand.run(cmd, projDir)
results match {
case Success(_) =>
print(". ")
processDependency(dependencyStr)
processor.queue.put(Some(dependency))
case Failure(f) =>
logger.error(s"\t- command '$cmd' failed", f)
}
}
})
val allResults: Future[List[Unit]] = Future.sequence(futures)
Await.result(allResults, Duration.Inf)
case Failure(f) =>
logger.error("\t- command 'go mod init joern.io/temp' failed", f)
}
})
})
case Failure(f) =>
logger.error("\t- command 'go mod init joern.io/temp' failed", f)
}
})
}
processor.queue.put(None)
processorThread.join()
}

private def processDependency(dependencyStr: String): Unit = {
val gopath = Try(sys.env("GOPATH")).getOrElse(Seq(os.home, "go").mkString(JFile.separator))
val dependencyLocation = (Seq(gopath, "pkg", "mod") ++ dependencyStr.split("/")).mkString(JFile.separator)
File.usingTemporaryDirectory("godep") { astLocation =>
val depConfig = Config()
.withInputPath(dependencyLocation)
.withIgnoredFilesRegex(config.ignoredFilesRegex.toString())
.withIgnoredFiles(config.ignoredFiles.toList)
// TODO: Need to implement mechanism to filter and process only used namespaces(folders) of the dependency.
// In order to achieve this filtering, we need to add support for inclusive rule with goastgen utility first.
val astGenResult = new AstGenRunner(depConfig).execute(astLocation).asInstanceOf[GoAstGenRunnerResult]
val goMod = new GoModHelper(
Some(depConfig),
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
new MethodAndTypeCacheBuilderPass(None, astGenResult.parsedFiles, depConfig, goMod, goGlobal).process()
private class DependencyProcessorQueue extends Runnable {
val queue =
new LinkedBlockingQueue[Option[GoModDependency]]()
override def run(): Unit = {
try {
var terminate = false
while (!terminate) {
queue.take() match {
case None =>
logger.debug("Shutting down WriterThread")
terminate = true
case Some(dependencyStr) =>
processDependency(dependencyStr)
}
}
} catch {
case exception: InterruptedException => logger.warn("Interrupted WriterThread", exception)
case exc: Exception =>
logger.error("error in writer thread, ", exc)
}
}

private def processDependency(dependency: GoModDependency): Unit = {
val gopath = Try(sys.env("GOPATH")).getOrElse(Seq(os.home, "go").mkString(JFile.separator))
val dependencyLocation =
(Seq(gopath, "pkg", "mod") ++ dependency.dependencyStr().split("/")).mkString(JFile.separator)
File.usingTemporaryDirectory("godep") { astLocation =>
val depConfig = Config()
.withInputPath(dependencyLocation)
.withIgnoredFilesRegex(config.ignoredFilesRegex.toString())
.withIgnoredFiles(config.ignoredFiles.toList)
val astGenResult = new AstGenRunner(depConfig, dependency.getIncludePackageRegex())
.execute(astLocation)
.asInstanceOf[GoAstGenRunnerResult]
val goMod = new GoModHelper(
Some(depConfig),
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
new MethodAndTypeCacheBuilderPass(None, astGenResult.parsedFiles, depConfig, goMod, goGlobal, astLocation)
.process()
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package io.joern.gosrc2cpg.passes

import better.files.File
import io.joern.gosrc2cpg.Config
import io.joern.gosrc2cpg.astcreation.AstCreator
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.parser.GoAstJsonParser
import io.joern.x2cpg.SourceFiles
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder
import io.shiftleft.codepropertygraph.generated.{Cpg, DiffGraphBuilder}

import java.nio.file.Paths
import scala.concurrent.ExecutionContext.Implicits.global
Expand All @@ -19,15 +19,15 @@ class MethodAndTypeCacheBuilderPass(
astFiles: List[String],
config: Config,
goMod: GoModHelper,
goGlobal: GoGlobal
goGlobal: GoGlobal,
tmpDir: File
) {
def process(): Seq[AstCreator] = {
val futures = astFiles
.map(file =>
Future {
val parserResult = GoAstJsonParser.readFile(Paths.get(file))
val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath)
val astCreator = new AstCreator(relPathFileName, parserResult, goMod, goGlobal)(config.schemaValidation)
val relPathFileName = SourceFiles.toRelativePath(file, tmpDir.pathAsString).replace(".json", "")
val astCreator = new AstCreator(file, relPathFileName, goMod, goGlobal)(config.schemaValidation)
val diffGraph = astCreator.buildCache(cpgOpt)
(astCreator, diffGraph)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package io.joern.gosrc2cpg.utils

import better.files.File
import com.typesafe.config.ConfigFactory
import io.joern.gosrc2cpg.Config
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.astgen.AstGenRunner.{AstGenProgramMetaData, AstGenRunnerResult}
Expand All @@ -10,9 +9,8 @@ import io.joern.x2cpg.utils.Environment.ArchitectureType.ArchitectureType
import io.joern.x2cpg.utils.Environment.OperatingSystemType.OperatingSystemType
import io.joern.x2cpg.utils.{Environment, ExternalCommand}
import org.slf4j.LoggerFactory
import versionsort.VersionHelper

import java.nio.file.Paths
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try}

object AstGenRunner {
Expand All @@ -24,7 +22,7 @@ object AstGenRunner {
) extends AstGenRunnerResult
}

class AstGenRunner(config: Config) extends AstGenRunnerBase(config) {
class AstGenRunner(config: Config, includeFileRegex: String = "") extends AstGenRunnerBase(config) {
import io.joern.gosrc2cpg.utils.AstGenRunner.*

override val WinX86 = "windows.exe"
Expand Down Expand Up @@ -70,18 +68,19 @@ class AstGenRunner(config: Config) extends AstGenRunnerBase(config) {
}
}

override def runAstGenNative(in: String, out: File, exclude: String)(implicit
override def runAstGenNative(in: String, out: File, exclude: String, include: String)(implicit
metaData: AstGenProgramMetaData
): Try[Seq[String]] = {
val excludeCommand = if (exclude.isEmpty) "" else s"-exclude \"$exclude\""
ExternalCommand.run(s"$astGenCommand $excludeCommand -out ${out.toString()} $in", ".")
val includeCommand = if (include.isEmpty) "" else s"-include \"$include\""
ExternalCommand.run(s"$astGenCommand $excludeCommand $includeCommand -out ${out.toString()} $in", ".")
}

override def execute(out: File): AstGenRunnerResult = {
implicit val metaData: AstGenProgramMetaData = config.astGenMetaData
val in = File(config.inputPath)
logger.info(s"Running goastgen in '$config.inputPath' ...")
runAstGenNative(config.inputPath, out, config.ignoredFilesRegex.toString()) match {
runAstGenNative(config.inputPath, out, config.ignoredFilesRegex.toString(), includeFileRegex.toString()) match {
case Success(result) =>
val srcFiles = SourceFiles.determine(
out.toString(),
Expand Down
Loading

0 comments on commit 53c2c77

Please sign in to comment.