In [11]:
import base64
import os
from tqdm import tqdm
from py2neo import Graph

PASS_LOG = r'/data2/zhangzheng1/data/code/exiv2-0.26/PASS_LOG'
RUNTIME_LOG = r'/data2/zhangzheng1/data/code/exiv2-0.26/PASS_LOG/RUNTIME_LOG'
CMP_CALLS = ["strcmp", "strncmp", "strcasecmp", "strncasecmp", "memcmp"]
PROJECT = 'exiv2'
VERSION = 0.26

In [12]:
with open('./PASSWORD', 'r') as fp:
    data = fp.read().strip().replace('\n', '')
    passwd = base64.b64decode(data).decode()

In [13]:
modSet = set() # 全部模块
funcSet = set() # 全部函数
blkSet = set() # 全部基本块
mod2func = {} # 模块m: m中的全部函数
func2blk = {} # 函数f：f的全部IR基本块 1:n（f为完整名称）
blk2call = {} # 基本块b：b中调用的全部函数f 1:n（b为完整名称）
blk2succ = {} # 基本块b：b所有的后续基本块（b为完整名称）
func2call = {} # 函数f：f调用的全部函数 1:n（f为完整名称）
func2cmp = {} # 函数f：f调用的全部cmp函数 1:n（f为完整名称）
curFunc = None
curBB = None
isInHis = False # 当前函数是否已在funcSet中（防止重复处理）

for f in tqdm(os.listdir(PASS_LOG)):
    if f == 'RUNTIME_LOG':
        continue
    with open(os.path.join(PASS_LOG, f), 'r') as fp:
        m = f[:-4]
        modSet.add(m) # modSet
        assert m not in mod2func, '[!] {} {}'.format(f, m)
        mod2func[m] = []
        lines = fp.readlines()
        for line in lines:
            line = line[:-1] # 去掉末尾的\n
            if '[F] ' in line:
                curFunc = m + '|' + line[4:] # moduleName|funcName
                if curFunc not in funcSet:
                    funcSet.add(curFunc) # funcSet
                    mod2func[m].append(line[4:]) # mod2func
                else:
                    raise Exception("[!] {}".format(line)) # 同文件中函数重名？
            elif '[BB] ' in line:
                bbName = line.split(' ')[-1].split('|')[0]
                if curFunc not in func2blk:
                    func2blk[curFunc] = []
                func2blk[curFunc].append(bbName) # func2blk
                curBB = curFunc + '|' + bbName
                blkSet.add(curBB) # blkSet
                assert curBB not in blk2succ, '[!] {} {}'.format(curBB, line)
                blk2succ[curBB] = line.split(' ')[-1].split('|')[1:] # blk2succ
            elif '[BBC] ' in line:
                bbCallFuncName = line[6:]
                if curBB not in blk2call:
                    blk2call[curBB] = []
                blk2call[curBB].append(bbCallFuncName) # blk2call
                if curFunc not in func2call:
                    func2call[curFunc] = []
                func2call[curFunc].append(bbCallFuncName) # func2call
            elif any([n in line for n in CMP_CALLS]):
                if curFunc not in func2cmp:
                    func2cmp[curFunc] = []
                func2cmp[curFunc].append(line) # func2cmp
            else:
#                 raise Exception("[!] {}".format(line))
                print("[!] {} {} {} {}".format(f, curFunc, curBB, line))

100%|██████████| 72/72 [00:00<00:00, 171.03it/s]


In [19]:
from py2neo import Graph
graph = Graph("bolt://127.0.0.1:7687", username="neo4j", password=passwd)
graph.run('MATCH ()-[r]->() DELETE r')
graph.run('MATCH (n) DELETE n')

<py2neo.database.Cursor at 0x7f9b9a8a7fd0>

导入静态分析的结果

In [20]:
graph.run('MERGE (:Project {{ name:"{}", version:"{}" }})'.format(PROJECT, VERSION))

<py2neo.database.Cursor at 0x7f9b9a9456d0>

In [21]:
for m in tqdm(mod2func):
    graph.run('MERGE (:Module {{ name:"{}", inProject:"{}" }})'.format(m, PROJECT))
    graph.run('MATCH (p:Project {{ name:"{}", version:"{}" }}), (m:Module {{ name:"{}", inProject:"{}" }}) MERGE (p)-[:hasModule]->(m)'.format(PROJECT, VERSION, m, PROJECT))

100%|██████████| 71/71 [00:01<00:00, 39.42it/s]


In [22]:
for m in tqdm(mod2func):
    for f in mod2func[m]:
        graph.run('MERGE (:Function {{ name:"{}", inProject:"{}", inModule:"{}" }})'.format(f, PROJECT, m))
        graph.run('MATCH (m:Module {{ name:"{0}", inProject:"{1}" }}), (f:Function {{ name:"{2}", inProject:"{1}", inModule:"{0}" }}) MERGE (m)-[:hasFunction]->(f)'.format(m, PROJECT, f))

100%|██████████| 71/71 [05:20<00:00,  4.52s/it]
