In [390]:
import os
from tqdm import tqdm

In [391]:
PASS_LOG = r'/data2/zhangzheng1/data/code/exiv2-0.26/PASS_LOG'
CMP_CALLS = ["strcmp", "strncmp", "strcasecmp", "strncasecmp", "memcmp"]

In [392]:
funcSet = set() # 全部函数
mod2func = {} # 模块m: m中的全部函数
func2blk = {} # 函数f：f的全部IR基本块 1:n（f为完整名称）
blk2call = {} # 函数f + 基本块b：b中调用的全部函数 1:n（b为完整名称）
blk2succ = {} # 基本块b：b所有的后续基本块（b为完整名称）
func2call = {} # 函数f：f调用的全部函数 1:n（f为完整名称）
func2cmp = {} # 函数f：f调用的全部cmp函数 1:n（f为完整名称）
curFunc = None
curBB = None
isInHis = False # 当前函数是否已在funcSet中（防止重复处理）
for f in tqdm(os.listdir(PASS_LOG)):
    if f == 'RUNTIME_LOG':
        continue
    with open(os.path.join(PASS_LOG, f), 'r') as fp:
        m = f[:-4]
        assert m not in mod2func, '[!] {} {}'.format(f, m)
        mod2func[m] = []
        lines = fp.readlines()
        for line in lines:
            line = line[:-1] # 去掉末尾的\n
            if '[F] ' in line:
                curFunc = m + '|' + line[4:] # moduleName|funcName
                if curFunc not in funcSet:
                    funcSet.add(curFunc) # funcSet
                    mod2func[m].append(line[4:]) # mod2func
                else:
                    raise Exception("[!] {}".format(line)) # 同文件中函数重名？
            elif '[BB] ' in line:
                bbName = line.split(' ')[-1].split('|')[0]
                if curFunc not in func2blk:
                    func2blk[curFunc] = []
                func2blk[curFunc].append(bbName) # func2blk
                curBB = curFunc + '|' + bbName
                assert curBB not in blk2succ, '[!] {} {}'.format(curBB, line)
                blk2succ[curBB] = line.split(' ')[-1].split('|')[1:] # blk2succ
            elif '[BBC] ' in line:
                bbCallFuncName = line[6:]
                if curBB not in blk2call:
                    blk2call[curBB] = []
                blk2call[curBB].append(bbCallFuncName) # blk2call
                if curFunc not in func2call:
                    func2call[curFunc] = []
                func2call[curFunc].append(bbCallFuncName) # func2call
            elif any([n in line for n in CMP_CALLS]):
                if curFunc not in func2cmp:
                    func2cmp[curFunc] = []
                func2cmp[curFunc].append(line) # func2cmp
            else:
#                 raise Exception("[!] {}".format(line))
                print("[!] {} {} {} {}".format(f, curFunc, curBB, line))

100%|██████████| 72/72 [00:00<00:00, 161.56it/s]


In [393]:
# import matplotlib.pyplot as plt
# from collections import Counter
# with plt.style.context(['science', 'no-latex']):
#     a = Counter([len(func2call[f]) for f in func2call])
#     plt.bar(a.keys(), a.values())
#     plt.show()

In [394]:
nonCallFuncSet = funcSet - set(func2call) # 不调用其它函数的函数集合
print('[+] {} {} {}'.format(len(funcSet), len(func2call), len(nonCallFuncSet)))

[+] 8397 3461 4936


In [395]:
func2cov = {} # 函数f：从f出发可以到达的最大IR基本块数量
for i,nf in enumerate(nonCallFuncSet): # 加入IR叶函数
    assert nf not in func2cov
    func2cov[nf] = len(func2blk.get(nf, []))

In [396]:
def getFuncCov(f, s):
    if f not in s:
        s.add(f)
        for cf in func2call.get(f, []):
            m = f.split('|')[0]
            if cf in mod2func[m]:
                getFuncCov(m + '|' + cf, s)
            else:
                raise Exception('[!] Invoke External Functions.')

In [397]:
for f in tqdm(func2call):
    tmps = set()
    getFuncCov(f, tmps)
    func2cov[f] = sum([len(func2blk.get(f, [])) for f in tmps])

100%|██████████| 3461/3461 [00:00<00:00, 37897.60it/s]


In [398]:
sorted(func2cov, key=lambda x : func2cov[x], reverse=True)[:10]

['epsimage.cpp|_ZN5Exiv28EpsImage12readMetadataEv',
 'epsimage.cpp|_ZN5Exiv28EpsImage13writeMetadataEv',
 'epsimage.cpp|_ZN12_GLOBAL__N_120readWriteEpsMetadataERN5Exiv27BasicIoERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSt6vectorINS0_13NativePreviewESaISB_EEb',
 'actions.cpp|_ZN6Action5Print12printSummaryEv',
 'exiv2.cpp|main',
 'rafimage.cpp|_ZN5Exiv28RafImage14printStructureERSoNS_20PrintStructureOptionEi',
 'exiv2.cpp|_ZN6Params6optionEiRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEi',
 'http.cpp|_ZN5Exiv24httpERSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES6_St4lessIS6_ESaISt4pairIKS6_S6_EEESE_RS6_',
 'jpgimage.cpp|_ZN5Exiv28JpegBase15doWriteMetadataERNS_7BasicIoE',
 'actions.cpp|_ZN6Action6Rename3runERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE']

分析运行时日志`RUNTIME_LOG`

In [399]:
RUNTIME_LOG = r'/data2/zhangzheng1/data/code/exiv2-0.26/PASS_LOG/RUNTIME_LOG'

In [400]:
with open(RUNTIME_LOG, 'r') as fp:
    for line in fp.readlines():
        line = line.strip()
        if '[PC] ' in line:
            bbGName = line.split(' ')[-1]
            mn, fn, bn = bbGName.split('|')
            assert mn in mod2func
            assert '|'.join([mn,fn]) in funcSet
            assert '|'.join([mn,fn]) in func2blk, '[!] {} {} {}'.format(line, fn, bn)
            assert bn in func2blk['|'.join([mn,fn])], '[!] {} {} {}'.format(line, fn, bn)
        elif '[STRCMP] ' in line:
            pass
        elif '[CMP] ' in line:
            pass
        elif '[SWITCH] ' in line:
            pass
        else:
            raise Exception('[!] {}'.format(line))
    else:
        print('[+] Pass Check, All Runtime Info Fits the Result of Static Analysis.')

AssertionError: [!] [PC] 0x41bc3f actions.cpp|_ZN6Action5Print3runERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE|emptyBB.1 _ZN6Action5Print3runERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE emptyBB.1