Skip to content

Commit

Permalink
[ruby] Grouped facter Bug Fixes (#4585)
Browse files Browse the repository at this point in the history
The [`facter`](https://github.com/puppetlabs/facter) repository causes this frontend to throw a lot of warnings. This PR fixes much of the low-hanging fruit, and improves the warning techniques to help make the warnings more meaningful. There are still, however, quite a few syntax errors to handle from this repository.

* Handles regexes generated from string formating.
* Handled the `!~` regex not match function. Long term any unrecognized functions are interpreted as calls.
* Added more cases for implicit returns
  • Loading branch information
DavidBakerEffendi committed May 23, 2024
1 parent 0d142fa commit 29740f9
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ object RubySrc2Cpg {
ignoredFilesPath = Option(config.ignoredFiles)
)
.map { fileName => () =>
resourceManagedParser.parse(fileName) match {
resourceManagedParser.parse(File(config.inputPath), fileName) match {
case Failure(exception) => throw exception
case Success(ctx) => new AstCreator(fileName, ctx, projectRoot)(config.schemaValidation)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
"|" -> Operators.or,
"^" -> Operators.xor,
"<<" -> Operators.shiftLeft,
">>" -> Operators.logicalShiftRight,
"=~" -> RubyOperators.regexpMatch
">>" -> Operators.logicalShiftRight
)

protected val AssignmentOperatorNames: Map[String, String] = Map(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
case node: IfExpression => astForIfExpression(node)
case node: UnlessExpression => astForUnlessExpression(node)
case node: RescueExpression => astForRescueExpression(node)
case node: CaseExpression => blockAst(NewBlock(), astsForCaseExpression(node).toList)
case node: MandatoryParameter => astForMandatoryParameter(node)
case node: SplattingRubyNode => astForSplattingRubyNode(node)
case node: AnonymousTypeDeclaration => astForAnonymousTypeDeclaration(node)
Expand All @@ -52,7 +53,10 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
case node: BreakStatement => astForBreakStatement(node)
case node: StatementList => astForStatementList(node)
case node: DummyNode => Ast(node.node)
case _ => astForUnknown(node)
case node: Unknown => astForUnknown(node)
case x =>
logger.warn(s"Unhandled expression of type ${x.getClass.getSimpleName}")
astForUnknown(node)

protected def astForStaticLiteral(node: StaticLiteral): Ast = {
Ast(literalNode(node, code(node), node.typeFullName))
Expand Down Expand Up @@ -117,10 +121,8 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
protected def astForBinary(node: BinaryExpression): Ast = {
getBinaryOperatorName(node.op) match
case None =>
logger.warn(s"Unrecognized binary operator: ${code(node)} ($relativeFileName), skipping")
astForUnknown(node)
case Some("=~") =>
astForMemberCall(MemberCall(node.lhs, ".", "=~", List(node.rhs))(node.span))
logger.debug(s"Unrecognized binary operator: ${code(node)} ($relativeFileName), assuming method call")
astForMemberCall(MemberCall(node.lhs, ".", node.op, List(node.rhs))(node.span))
case Some(op) =>
val lhsAst = astForExpression(node.lhs)
val rhsAst = astForExpression(node.rhs)
Expand Down Expand Up @@ -639,7 +641,10 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
protected def astForUnknown(node: RubyNode): Ast = {
val className = node.getClass.getSimpleName
val text = code(node)
logger.warn(s"Could not represent expression: $text ($className) ($relativeFileName), skipping")
node match {
case _: Unknown => // Unknowns are syntax errors which are logged by the parser already
case _ => logger.warn(s"Could not represent expression: $text ($className) ($relativeFileName), skipping")
}
Ast(unknownNode(node, text))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t
astsForStatement(transform(expr))
case node: MemberCallWithBlock => returnAstForRubyCall(node)
case node: SimpleCallWithBlock => returnAstForRubyCall(node)
case _: (LiteralExpr | BinaryExpression | UnaryExpression | SimpleIdentifier | IndexAccess | Association |
YieldExpr | RubyCall | RubyFieldIdentifier) =>
case _: (LiteralExpr | BinaryExpression | UnaryExpression | SimpleIdentifier | SelfIdentifier | IndexAccess |
Association | YieldExpr | RubyCall | RubyFieldIdentifier | HereDocNode | Unknown) =>
astForReturnStatement(ReturnExpression(List(node))(node.span)) :: Nil
case node: SingleAssignment =>
astForSingleAssignment(node) :: List(astForReturnStatement(ReturnExpression(List(node.lhs))(node.span)))
Expand All @@ -282,7 +282,7 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t
case ret: ReturnExpression => astForReturnStatement(ret) :: Nil
case node: MethodDeclaration =>
(astForMethodDeclaration(node) :+ astForReturnMethodDeclarationSymbolName(node)).toList

case _: BreakStatement => astsForStatement(node).toList
case node =>
logger.warn(
s"Implicit return here not supported yet: ${node.text} (${node.getClass.getSimpleName}), only generating statement"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ object AntlrContextHelpers {
sealed implicit class RegularExpressionLiteralContextHelper(ctx: RegularExpressionLiteralContext) {
def isStatic: Boolean = !isDynamic
def isDynamic: Boolean = ctx.regexpLiteralContent.asScala.exists(c => Option(c.compoundStatement()).isDefined)

def interpolations: List[ParserRuleContext] = ctx
.regexpLiteralContent()
.asScala
.filter(ctx => Option(ctx.compoundStatement()).isDefined)
.map(ctx => ctx.compoundStatement())
.toList
}

sealed implicit class CurlyBracesBlockContextHelper(ctx: CurlyBracesBlockContext) {
Expand All @@ -104,12 +111,11 @@ object AntlrContextHelpers {
}

sealed implicit class CommandArgumentContextHelper(ctx: CommandArgumentContext) {
def arguments: List[ParserRuleContext] = ctx match
def arguments: List[ParserRuleContext] = ctx match {
case ctx: CommandCommandArgumentListContext => ctx.command() :: Nil
case ctx: CommandArgumentCommandArgumentListContext => ctx.commandArgumentList().elements
case ctx =>
logger.warn(s"Unsupported argument type ${ctx.getClass}")
List()
case ctx => Nil
}
}

sealed implicit class CommandArgumentListContextHelper(ctx: CommandArgumentListContext) {
Expand Down Expand Up @@ -164,7 +170,7 @@ object AntlrContextHelpers {
case ctx: SplattingArgumentIndexingArgumentListContext => ctx.splattingArgument() :: Nil
case ctx: OperatorExpressionListWithSplattingArgumentIndexingArgumentListContext => ctx.splattingArgument() :: Nil
case ctx =>
logger.warn(s"Unsupported argument type ${ctx.getClass}")
logger.warn(s"IndexingArgumentListContextHelper - Unsupported argument type ${ctx.getClass}")
List()
}

Expand All @@ -173,7 +179,7 @@ object AntlrContextHelpers {
case _: EmptyArgumentWithParenthesesContext => List()
case ctx: ArgumentListArgumentWithParenthesesContext => ctx.argumentList().elements
case ctx =>
logger.warn(s"Unsupported argument type ${ctx.getClass}")
logger.warn(s"ArgumentWithParenthesesContextHelper - Unsupported argument type ${ctx.getClass}")
List()
}

Expand All @@ -189,8 +195,10 @@ object AntlrContextHelpers {
Option(ctx.associationList()).map(_.associations).getOrElse(List.empty)
case ctx: SplattingArgumentArgumentListContext =>
Option(ctx.splattingArgument()).toList
case ctx: BlockArgumentArgumentListContext =>
Option(ctx.blockArgument()).toList
case ctx =>
logger.warn(s"Unsupported element type ${ctx.getClass.getSimpleName}")
logger.warn(s"ArgumentListContextHelper - Unsupported element type ${ctx.getClass.getSimpleName}")
List()
}
}
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
package io.joern.rubysrc2cpg.parser

import better.files.File
import org.antlr.v4.runtime.*
import org.antlr.v4.runtime.atn.{ATN, ATNConfigSet}
import org.antlr.v4.runtime.dfa.DFA
import org.slf4j.LoggerFactory

import java.io.File.separator
import java.util
import scala.collection.mutable.ListBuffer
import scala.util.Try

/** A consumable wrapper for the RubyParser class used to parse the given file and be disposed thereafter.
* @param inputDir
* the directory of the target to parse.
* @param filename
* the file path to the file to be parsed.
*/
class AntlrParser(filename: String) {
class AntlrParser(inputDir: File, filename: String) {

private val charStream = CharStreams.fromFileName(filename)
private val lexer = new RubyLexer(charStream)
Expand All @@ -32,7 +35,8 @@ class AntlrParser(filename: String) {
msg: String,
e: RecognitionException
): Unit = {
val errorMessage = s"Syntax error on $filename:$line:$charPositionInLine"
val errorMessage =
s"Syntax error on ${filename.stripPrefix(s"${inputDir.pathAsString}$separator")}:$line:$charPositionInLine"
errors.append(errorMessage)
}

Expand Down Expand Up @@ -85,8 +89,9 @@ class ResourceManagedParser(clearLimit: Double) extends AutoCloseable {
private var maybeDecisionToDFA: Option[Array[DFA]] = None
private var maybeAtn: Option[ATN] = None

def parse(filename: String): Try[RubyParser.ProgramContext] = {
val antlrParser = AntlrParser(filename)
def parse(inputFile: File, filename: String): Try[RubyParser.ProgramContext] = {
val inputDir = if inputFile.isDirectory then inputFile else inputFile.parent
val antlrParser = AntlrParser(inputDir, filename)
val interp = antlrParser.parser.getInterpreter
// We need to grab a live instance in order to get the static variables as they are protected from static access
maybeDecisionToDFA = Option(interp.decisionToDFA)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,7 @@ class RubyNodeCreator extends RubyParserBaseVisitor[RubyNode] {
if (ctx.isStatic) {
StaticLiteral(getBuiltInType(Defines.Regexp))(ctx.toTextSpan)
} else {
logger.warn(s"Unhandled regular expression literal '${ctx.toTextSpan}'")
Unknown()(ctx.toTextSpan)
DynamicLiteral(getBuiltInType(Defines.Regexp), ctx.interpolations.map(visit))(ctx.toTextSpan)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ object Defines {
val association = "<operator>.association"
val splat = "<operator>.splat"
val regexpMatch = "=~"
val regexpNotMatch = "!~"
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.joern.rubysrc2cpg.querying

import io.joern.rubysrc2cpg.testfixtures.RubyCode2CpgFixture
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.semanticcpg.language.*

class LiteralTests extends RubyCode2CpgFixture {
Expand Down Expand Up @@ -221,4 +222,28 @@ class LiteralTests extends RubyCode2CpgFixture {
literal.lineNumber shouldBe Some(2)
literal.typeFullName shouldBe "__builtin.Regexp"
}

"`/#{os_version_regex}/` is represented by a CALL node with a string format method full name" in {
val cpg = code("""
|os_version_regex = "1.2.0"
|/#{os_version_regex}/
|""".stripMargin)

val List(formatValueCall) = cpg.call.code("/#.*").l
formatValueCall.code shouldBe "/#{os_version_regex}/"
formatValueCall.lineNumber shouldBe Some(3)
formatValueCall.typeFullName shouldBe "__builtin.Regexp"
formatValueCall.methodFullName shouldBe Operators.formatString
}

"regex values in a hash literal" ignore {
val cpg = code("""
|PLATFORM_PATTERNS = {
| :redhat => /fedora|el-|centos/
|}
|""".stripMargin)

cpg.method(":program").dotAst.foreach(println)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,24 @@ class MethodReturnTests extends RubyCode2CpgFixture(withDataFlow = true) {
}
}

"implicit return of a heredoc should return a literal" in {
val cpg = code("""
|def custom_fact_content(key='custom_fact', value='custom_value', *args)
| <<-EOM
| Facter.add('#{key}') do
| setcode {'#{value}'}
| #{args.empty? ? '' : args.join('\n')}
| end
| EOM
|end
|""".stripMargin)

inside(cpg.method.nameExact("custom_fact_content").methodReturn.toReturn.astChildren.l) {
case (heredoc: Literal) :: Nil =>
heredoc.typeFullName shouldBe "__builtin.String"
heredoc.code should startWith("<<-EOM")
case xs => fail(s"Expected a single literal node, instead got [${xs.code.mkString(", ")}]")
}
}

}

0 comments on commit 29740f9

Please sign in to comment.