Skip to content

Commit

Permalink
Reintroduce Python Compatibility Changes (#2785)
Browse files Browse the repository at this point in the history
* Make pysrc2cpg compatible with closed source data flow tracker again. (#2680)

* ossdataflowengine: fix edge case when tracking from receivers (#2722)

* Type Recovery Tweaks
* Added tests that were resulting in some failure
* Added support for string constant type hints (idk how but this is valid Python)
* Using `ForkJoinParallelCpgPass` for `DynamicTypeHintFullNamePass`
* Making sure that `DynamicTypeHintFullNamePass` considers types within the current scope
* Sharing the logic for import path resolution between `XInheritanceFullNamePass` and `DynamicTypeHintFullNamePass`
  • Loading branch information
DavidBakerEffendi authored Jun 2, 2023
1 parent 6afe169 commit af9761d
Show file tree
Hide file tree
Showing 14 changed files with 324 additions and 184 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
withFieldAndIndexAccesses(
List(x).collectAll[CfgNode].toList ++ x.refsTo.collectAll[Local].flatMap(sourceToStartingPoints)
) ++ x.refsTo.capturedByMethodRef.referencedMethod.flatMap(m => usagesForName(x.name, m))
case x: Call =>
(x._receiverIn.l :+ x).collect { case y: CfgNode => y }
case x => List(x).collect { case y: CfgNode => y }
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ContextStack {
}

private class MethodContext(
val name: String,
val scopeName: Option[String],
val astParent: nodes.NewNode,
val order: AutoIncIndex,
val isClassBodyMethod: Boolean = false,
Expand All @@ -46,7 +46,7 @@ class ContextStack {
) extends Context {}

private class ClassContext(
val name: String,
val scopeName: Option[String],
val astParent: nodes.NewNode,
val order: AutoIncIndex,
val variables: mutable.Map[String, nodes.NewNode] = mutable.Map.empty,
Expand Down Expand Up @@ -92,23 +92,30 @@ class ContextStack {
}

def pushMethod(
name: String,
scopeName: Option[String],
methodNode: nodes.NewMethod,
methodBlockNode: nodes.NewBlock,
methodRefNode: Option[nodes.NewMethodRef]
): Unit = {
val isClassBodyMethod = stack.headOption.exists(_.isInstanceOf[ClassContext])

val methodContext =
new MethodContext(name, methodNode, new AutoIncIndex(1), isClassBodyMethod, Some(methodBlockNode), methodRefNode)
new MethodContext(
scopeName,
methodNode,
new AutoIncIndex(1),
isClassBodyMethod,
Some(methodBlockNode),
methodRefNode
)
if (moduleMethodContext.isEmpty) {
moduleMethodContext = Some(methodContext)
}
push(methodContext)
}

def pushClass(name: String, classNode: nodes.NewTypeDecl): Unit = {
push(new ClassContext(name, classNode, new AutoIncIndex(1)))
def pushClass(scopeName: Option[String], classNode: nodes.NewTypeDecl): Unit = {
push(new ClassContext(scopeName, classNode, new AutoIncIndex(1)))
}

def pushSpecialContext(): Unit = {
Expand Down Expand Up @@ -243,7 +250,7 @@ class ContextStack {
*/
def considerAsGlobalVariable(lhs: NewNode): Unit = {
lhs match {
case n: NewIdentifier if findEnclosingMethodContext(stack).name == "<module>" =>
case n: NewIdentifier if findEnclosingMethodContext(stack).scopeName.contains("<module>") =>
addGlobalVariable(n.name)
case _ =>
}
Expand Down Expand Up @@ -376,13 +383,12 @@ class ContextStack {
def qualName: String = {
stack
.flatMap {
case methodContext: MethodContext if !methodContext.isClassBodyMethod =>
Some(methodContext.name)
case methodContext: MethodContext =>
methodContext.scopeName
case specialBlockContext: SpecialBlockContext =>
None
case classContext: ClassContext =>
Some(classContext.name)
case _: MethodContext => None
classContext.scopeName
}
.reverse
.mkString(".")
Expand All @@ -407,10 +413,9 @@ class ContextStack {
}

def isClassContext: Boolean = {
val stackTail = stack.tail
stackTail.nonEmpty && (stackTail.headOption match {
case Some(_: ClassContext) => true
case _ => false
stack.nonEmpty && (stack.head match {
case methodContext: MethodContext if methodContext.isClassBodyMethod => true
case _ => false
})
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package io.joern.pysrc2cpg

import io.joern.x2cpg.passes.frontend.ImportStringHandling
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.PropertyNames
import io.shiftleft.codepropertygraph.generated.nodes.StoredNode
import io.shiftleft.passes.CpgPass
import io.shiftleft.codepropertygraph.generated.nodes.{CfgNode, MethodParameterIn, MethodReturn, StoredNode}
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.shiftleft.semanticcpg.language._
import overflowdb.BatchedUpdate

Expand All @@ -13,48 +14,65 @@ import java.util.regex.{Matcher, Pattern}
/** The type hints we pick up via the parser are not full names. This pass fixes that by retrieving the import for each
* dynamic type hint and adjusting the dynamic type hint full name field accordingly.
*/
class DynamicTypeHintFullNamePass(cpg: Cpg) extends CpgPass(cpg) {
override def run(diffGraph: BatchedUpdate.DiffGraphBuilder): Unit = {
val fileToImports = cpg.imports.l
.flatMap { imp =>
imp.call.file.l.map { f => f.name -> imp }
}
.groupBy(_._1)
.view
.mapValues(_.map(_._2))
class DynamicTypeHintFullNamePass(cpg: Cpg) extends ForkJoinParallelCpgPass[CfgNode](cpg) {

for {
methodReturn <- cpg.methodReturn.filter(x => x.dynamicTypeHintFullName.nonEmpty)
typeHint <- methodReturn.dynamicTypeHintFullName
file <- methodReturn.file
imports <- fileToImports.get(file.name)
importedEntity <- imports.filter { x =>
// TODO: Handle * imports correctly
x.importedAs.exists { imported => typeHint.matches(Pattern.quote(imported) + "(\\..+)*") }
}.importedEntity
} {
setTypeHints(diffGraph, methodReturn, typeHint, typeHint, importedEntity)
private case class ImportScope(entity: Option[String], alias: Option[String])

private val fileToImports = cpg.imports.l
.flatMap(imp => imp.call.file.l.map { f => f.name -> imp })
.groupBy(_._1)
.view
.mapValues(_.map { case (_, imp) =>
ImportScope(imp.importedEntity, imp.importedAs)
})

override def generateParts(): Array[CfgNode] =
(cpg.methodReturn.filter(x => x.typeFullName != Constants.ANY) ++ cpg.parameter.filter(x =>
x.typeFullName != Constants.ANY
)).toArray

override def runOnPart(builder: DiffGraphBuilder, part: CfgNode): Unit =
part match {
case x: MethodReturn => runOnMethodReturn(builder, x)
case x: MethodParameterIn => runOnMethodParameter(builder, x)
case _ =>
}

for {
param <- cpg.parameter.filter(x => x.dynamicTypeHintFullName.nonEmpty)
typeHint <- param.dynamicTypeHintFullName
file <- param.file
imports <- fileToImports.get(file.name)
importDetails <- imports
// TODO: Handle * imports correctly
.filter(_.importedAs.exists { imported => typeHint.matches(Pattern.quote(imported) + "(\\..+)*") })
.map(i => (i.importedEntity, i.importedAs))
} {
importDetails match {
case (Some(importedEntity), Some(importedAs)) =>
setTypeHints(diffGraph, param, typeHint, importedAs, importedEntity)
case _ =>
}
private def runOnMethodReturn(diffGraph: DiffGraphBuilder, methodReturn: MethodReturn): Unit =
methodReturn.file.foreach { file =>
val typeHint = methodReturn.typeFullName
val imports = fileToImports.getOrElse(file.name, List.empty) ++ methodReturn.method.typeDecl
.map(td => ImportScope(Option(pythonicTypeNameToImport(td.fullName)), Option(td.name)))
.toList
imports
.filter { x =>
// TODO: Handle * imports correctly
x.alias.exists { imported => typeHint.matches(Pattern.quote(imported) + "(\\..+)*") }
}
.flatMap(_.entity)
.foreach { importedEntity =>
setTypeHints(diffGraph, methodReturn, typeHint, typeHint, importedEntity)
}
}

private def runOnMethodParameter(diffGraph: DiffGraphBuilder, param: MethodParameterIn): Unit =
param.file.foreach { file =>
val typeHint = param.typeFullName
val imports = fileToImports.getOrElse(file.name, List.empty) ++ param.method.typeDecl
.map(td => ImportScope(Option(pythonicTypeNameToImport(td.fullName)), Option(td.name)))
.toList
imports
// TODO: Handle * imports correctly
.filter(_.alias.exists { imported => typeHint.matches(Pattern.quote(imported) + "(\\..+)*") })
.foreach {
case ImportScope(Some(importedEntity), Some(importedAs)) =>
setTypeHints(diffGraph, param, typeHint, importedAs, importedEntity)
case _ =>
}
}

}
private def pythonicTypeNameToImport(fullName: String): String =
fullName.replaceFirst("\\.py:<module>", "").replaceAll(Pattern.quote(File.separator), ".")

private def setTypeHints(
diffGraph: BatchedUpdate.DiffGraphBuilder,
Expand All @@ -63,17 +81,21 @@ class DynamicTypeHintFullNamePass(cpg: Cpg) extends CpgPass(cpg) {
alias: String,
importedEntity: String
) = {
val typeFullName = typeHint.replaceFirst(Pattern.quote(alias), importedEntity)
val typeFilePath = typeFullName.replaceAll("\\.", Matcher.quoteReplacement(File.separator))
val pythonicTypeFullName = typeFullName.split("\\.").lastOption match {
val importFullPath = ImportStringHandling.combinedPath(importedEntity, typeHint)
val typeHintFullName = typeHint.replaceFirst(Pattern.quote(alias), importedEntity)
val typeFilePath = typeHintFullName.replaceAll("\\.", Matcher.quoteReplacement(File.separator))
val pythonicTypeFullName = importFullPath.split("\\.").lastOption match {
case Some(typeName) =>
typeFilePath.stripSuffix(s"${File.separator}$typeName").concat(s".py:<module>.$typeName")
case None => typeFullName
case None => typeHintFullName
}
cpg.typeDecl.fullName(s".*${Pattern.quote(pythonicTypeFullName)}").l match {
case xs if xs.sizeIs == 1 =>
diffGraph.setNodeProperty(node, PropertyNames.TYPE_FULL_NAME, xs.fullName.head)
case xs if xs.nonEmpty =>
diffGraph.setNodeProperty(node, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, xs.fullName.toSeq)
case _ => diffGraph.setNodeProperty(node, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, Seq(pythonicTypeFullName))
case _ =>
diffGraph.setNodeProperty(node, PropertyNames.TYPE_FULL_NAME, pythonicTypeFullName)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,40 +140,49 @@ class NodeBuilder(diffGraph: DiffGraphBuilder) {
.name(name)
.code(name)
.evaluationStrategy(EvaluationStrategies.BY_SHARING)
.typeFullName(Constants.ANY)
.typeFullName(extractTypesFromHint(typeHint).getOrElse(Constants.ANY))
.isVariadic(isVariadic)
.lineNumber(lineAndColumn.line)
.columnNumber(lineAndColumn.column)
index.foreach(idx => methodParameterNode.index(idx))
methodParameterNode.dynamicTypeHintFullName(extractTypesFromHint(typeHint))
addNodeToDiff(methodParameterNode)
}

def extractTypesFromHint(typeHint: Option[ast.iexpr] = None): Seq[String] = {
def extractTypesFromHint(typeHint: Option[ast.iexpr] = None): Option[String] = {
typeHint match {
case Some(hint) =>
val nameSequence = hint match {
case n: ast.Name => Seq(n.id)
case n: ast.Name => Option(n.id)
// TODO: Definitely a place for follow up handling of generics - currently only take the polymorphic type
// without type args. To see the type arguments, see ast.Subscript.slice
case attr: ast.Attribute =>
extractTypesFromHint(Some(attr.value)).map { x => x + "." + attr.attr }
case n: ast.Subscript if n.value.isInstanceOf[ast.Name] => Seq(n.value.asInstanceOf[ast.Name].id)
case _ => Seq[String]()
case n: ast.Subscript if n.value.isInstanceOf[ast.Name] => Option(n.value.asInstanceOf[ast.Name].id)
case n: ast.Constant if n.value.isInstanceOf[ast.StringConstant] =>
Option(n.value.asInstanceOf[ast.StringConstant].value)
case _ => None
}
nameSequence.map { typeName =>
if (allBuiltinClasses.contains(typeName)) s"$builtinPrefix$typeName"
else if (typingClassesV3.contains(typeName)) s"$typingPrefix$typeName"
else typeName
}
case _ =>
Seq()
case _ => None
}
}

def methodReturnNode(dynamicTypeHintFullName: Option[String], lineAndColumn: LineAndColumn): nodes.NewMethodReturn = {
def methodReturnNode(
staticTypeHint: Option[String],
dynamicTypeHintFullName: Option[String],
lineAndColumn: LineAndColumn
): nodes.NewMethodReturn = {
val methodReturnNode = NodeBuilders
.newMethodReturnNode(Constants.ANY, dynamicTypeHintFullName, Some(lineAndColumn.line), Some(lineAndColumn.column))
.newMethodReturnNode(
staticTypeHint.getOrElse(Constants.ANY),
dynamicTypeHintFullName,
Some(lineAndColumn.line),
Some(lineAndColumn.column)
)
.evaluationStrategy(EvaluationStrategies.BY_SHARING)

addNodeToDiff(methodReturnNode)
Expand Down
Loading

0 comments on commit af9761d

Please sign in to comment.