-
Notifications
You must be signed in to change notification settings - Fork 0
/
astAnalyzers.py
190 lines (159 loc) · 8.38 KB
/
astAnalyzers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from mainTool.udg.environments import CallEnvironment, UseDefEnvironment, ArgumentEnvironment, AssignmentEnvironment
from mainTool.udg.environments import IncDecEnvironment, DeclEnvironment, PtrMemberAccessEnvironment, MemberAccessEnvironment
from mainTool.udg.environments import UseEnvironment, ArrayIndexingEnvironment, UnaryOpEnvironment, IdentifierEnvironment
from mainTool.udg.astProvider import ASTProvider, ASTNodeASTProvider
from mainTool.udg.useDefGraph import UseOrDef, UseDefGraph
from mainTool.ast.astNode import ASTNode
from mainTool.cfg.CFG import *
from typing import Set, Dict, List
import sys
# 这个类主要记录函数调用中参数为指针变量的信息,等同于Joern中的TaintSource,但是Joern考虑了指针def没考虑指针use的情况
class CalleeInfos(object):
def __init__(self):
# 假设key为memset,value为[0],表示memset的第0个参数使用了指针变量,memset(a, xx,xx); 中use的symbol要包含 * a
self.calleeToArgUseIds: Dict[str, List[int]] = dict()
# 假设key为gets, value为[0],表示gets函数重新定义了第0个指针参数,gets(buf) 重新定义了symbol * buf
self.calleeToArgDefIds: Dict[str, List[int]] = dict()
# 参数为可变参数的情况
# 比如 scanf -> 1, 表示 scanf会重新定义第1个以后的所有参数, scanf("%d", &a) 会重新定义 a
self.calleeToDefStartIds: Dict[str, int] = dict()
# 判断是否使用指针
def judgeUse(self, callEnv: CallEnvironment, childNumber: int) -> bool:
callee: str = callEnv.astProvider.getChild(0).getEscapedCodeStr()
return childNumber in self.calleeToArgUseIds.get(callee, [])
# 判断是否定义指针
def judgeDef(self, callEnv: CallEnvironment, childNumber: int) -> bool:
callee: str = callEnv.astProvider.getChild(0).getEscapedCodeStr()
if childNumber in self.calleeToArgDefIds.get(callee, []):
return True
return childNumber >= self.calleeToDefStartIds.get(callee, sys.maxsize)
def addArgUse(self, callee: str, argN: int):
if callee not in self.calleeToArgUseIds.keys():
self.calleeToArgUseIds[callee] = []
self.calleeToArgUseIds[callee].append(argN)
def addArgDef(self, callee: str, argN: int):
if callee not in self.calleeToArgDefIds.keys():
self.calleeToArgDefIds[callee] = []
self.calleeToArgDefIds[callee].append(argN)
def addArgDefStartIds(self, callee: str, argN: int):
self.calleeToDefStartIds[callee] = argN
class ASTDefUseAnalyzer(object):
def __init__(self):
self.environmentStack: List[UseDefEnvironment] = list()
self.useDefsOfBlock: Set[UseOrDef] = set()
self.calleeInfos: CalleeInfos = CalleeInfos()
def reset(self):
self.environmentStack.clear()
self.useDefsOfBlock.clear()
def emitUseOrDefs(self, toEmit: List[UseOrDef]):
for useOrDef in toEmit:
self.useDefsOfBlock.add(useOrDef)
# Gets upstream symbols from environment and passes them to
# parent-environment by calling addChildSymbols on the parent. Asks
# parent-environment to generate useOrDefs and emit them.
def reportUpstream(self, env: UseDefEnvironment):
symbols: List[str] = env.upstreamSymbols()
astProvider: ASTProvider = env.astProvider
if len(self.environmentStack) > 0:
parentEnv: UseDefEnvironment = self.environmentStack[-1]
parentEnv.addChildSymbols(symbols, astProvider)
def createArgumentEnvironment(self, astProvider: ASTProvider) -> ArgumentEnvironment:
argEnv: ArgumentEnvironment = ArgumentEnvironment()
# 中间还隔着个ArgumentList
callEnv: CallEnvironment = self.environmentStack[-2]
# 该参数是否使用指针
if self.calleeInfos.judgeUse(callEnv, astProvider.getChildNumber()):
argEnv.setIsUsePointer()
# 是否定义指针
if self.calleeInfos.judgeDef(callEnv, astProvider.getChildNumber()):
argEnv.setIsDefPointer()
return argEnv
# Creates a UseDefEnvironment for a given AST node.
def createUseDefEnvironment(self, astProvider: ASTProvider) -> UseDefEnvironment:
nodeType: str = astProvider.getTypeAsString()
if nodeType == "AssignmentExpr":
return AssignmentEnvironment()
elif nodeType == "IncDecOp":
return IncDecEnvironment()
elif nodeType == "IdentifierDecl" or nodeType == "Parameter":
return DeclEnvironment()
elif nodeType == "CallExpression":
return CallEnvironment()
elif nodeType == "Argument":
return self.createArgumentEnvironment(astProvider)
elif nodeType == "PtrMemberAccess":
return PtrMemberAccessEnvironment()
elif nodeType == "MemberAccess":
return MemberAccessEnvironment()
# condition和return中只有use没有def
elif nodeType == "Condition" or nodeType == "ReturnStatement":
return UseEnvironment()
elif nodeType == "ArrayIndexing":
return ArrayIndexingEnvironment()
elif nodeType == "UnaryOp":
return UnaryOpEnvironment()
elif nodeType == "Identifier":
return IdentifierEnvironment()
else:
return UseDefEnvironment()
def traverseAST(self, astProvider: ASTProvider):
env: UseDefEnvironment = self.createUseDefEnvironment(astProvider)
env.astProvider = astProvider
self.traverseASTChildren(astProvider, env)
def traverseASTChildren(self, astProvider: ASTProvider, env: UseDefEnvironment):
numChildren: int = astProvider.getChildCount()
self.environmentStack.append(env)
for i in range(numChildren):
childProvider: ASTProvider = astProvider.getChild(i)
self.traverseAST(childProvider)
toEmit: List[UseOrDef] = env.useOrDefsFromSymbols(childProvider)
self.emitUseOrDefs(toEmit)
self.environmentStack.pop()
self.reportUpstream(env)
def analyzeAST(self, astProvider: ASTProvider) -> Set[UseOrDef]:
self.reset()
self.traverseAST(astProvider)
return self.useDefsOfBlock
class CFGToUDGConverter(object):
def __init__(self):
self.astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer()
# statementNode是CFGNode
def addToUseDefGraph(self, useDefGraph: UseDefGraph, usesAndDefs: List[UseOrDef], statementNode: ASTNode):
insertedForStatementDef: Set[str] = set()
insertedForStatementUse: Set[str] = set()
for useOrDef in usesAndDefs:
astProvider: ASTNodeASTProvider = useOrDef.astProvider
# CHECK?
useOrDefNode: ASTNode = astProvider.node
if useOrDef.isDef:
if useOrDef.symbol not in insertedForStatementDef:
useDefGraph.addDefinition(useOrDef.symbol, statementNode)
insertedForStatementDef.add(useOrDef.symbol)
# 给ASTNode添加
if useOrDefNode is not None and useOrDefNode != statementNode:
useDefGraph.addDefinition(useOrDef.symbol, useOrDefNode)
else:
if useOrDef.symbol not in insertedForStatementUse:
useDefGraph.addUse(useOrDef.symbol, statementNode)
insertedForStatementUse.add(useOrDef.symbol)
# Add use-links from AST nodes to symbols
if useOrDef.astProvider is not None and useOrDefNode is not statementNode:
useDefGraph.addUse(useOrDef.symbol, useOrDefNode)
# 将CFG转化为UDG
def convert(self, cfg: CFG) -> UseDefGraph:
# Incrementally create a UseDefGraph by generating
# UseOrDefs for each statement separately and adding those
# to the UseDefGraph
useDefGraph: UseDefGraph = UseDefGraph()
statements: List[CFGNode] = cfg.vertices
# CFG中每个语句独立分析
for cfgNode in statements:
# skip empty blocks
if not isinstance(cfgNode, ASTNodeContainer):
continue
statementNode: ASTNode = cfgNode.astNode
provider: ASTNodeASTProvider = ASTNodeASTProvider()
provider.node = statementNode
usesAndDefs: List[UseOrDef] = self.astAnalyzer.analyzeAST(provider)
self.addToUseDefGraph(useDefGraph, usesAndDefs, statementNode)
return useDefGraph