# 代码覆盖率
来源：[Code Coverage](https://www.fuzzingbook.org/html/Coverage.html)

In [None]:
def cgi_decode(s):
    # 对使用cgi的url解码
    hex_vaule = {
        '0': 0, '1': 1, '2': 2, '3': 3, '4': 4,
        '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
        'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15,
        'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15,
    }

    exchange_value = {'+':' '}

    i = 0
    t = ''
    while i < len(s):
        # 如果是交换表中的内容直接替换
        # 如果是%的形式，计算转换
        # 其他情况直接赋值
        if s[i] in exchange_value.keys():
            t += exchange_value[s[i]]
            i = i+1
        elif s[i] == '%':
            high_value,low_value = s[i+1],s[i+2]
            if high_value in hex_vaule and low_value in hex_vaule:
                t += chr(hex_vaule[high_value]*16+hex_vaule[low_value])
                i += 3
            else:
                raise ValueError("Invalid encoding")
        else :
            t += s[i]
            i += 1
    
    return t

In [None]:
# 测试下cig_decode程序
cgi_decode("Hello%2c+world%21")

## 黑盒测试
针对特殊用例。黑盒测试的优点是它可以发现指定行为中的错误。它独立于给定的实现，因此允许在实现之前创建测试。缺点是，实现的行为通常比指定的行为涵盖更多的领域，因此仅基于规范的测试通常不能涵盖所有的实现细节。

In [None]:
assert cgi_decode("a+b") == "a b"
assert cgi_decode("a%21b") == "a!b"
assert cgi_decode("abc") == "abc"

try:
    cgi_decode("%?")
    raise False
except:
    pass

## 白盒测试
与黑盒测试相比，白盒测试从实现(特别是内部结构)派生测试。白盒测试与覆盖代码结构特性的概念紧密相关。例如，如果在测试期间没有执行代码中的语句，这意味着也不能触发该语句中的错误。因此，白盒测试引入了许多必须在测试被认为是充分的之前被满足的覆盖率标准。最经常使用的覆盖率标准是：

* 语句覆盖率代码中的每个语句都必须由至少一个测试输入执行。
* 分支覆盖代码中的每个分支必须由至少一个测试输入获得。(这也就是说，每一个if和while决定一次是正确的，一次是错误的。)

这里，上面的黑盒测试的测试用例已经覆盖了每一条语句。所以，白盒测试的过程和上面相同。

## 跟踪执行
背景：[sys.settrace(tracefunc)](https://docs.python.org/zh-cn/3/library/sys.html#sys.settrace)

trace函数：
The local trace function should return a reference to itself 
(or to another function for further tracing in that scope), 
or None to turn off tracing in that scope.

In [None]:
# 存储覆盖率情况
coverage = []

In [None]:
def traceit(frame,event,arg):
    if event == "line":
        global coverage
        function_name = frame.f_code.co_name
        lineno = frame.f_lineno
        coverage.append(lineno)
    return traceit

In [None]:
import sys
def cgi_decode_traced(s):
    global coverage
    coverage = [] # 原来内容清空
    sys.settrace(traceit) # 打开
    t = cgi_decode(s)
    sys.settrace(None) # 关闭
    return t

In [None]:
t = cgi_decode_traced("Hello%2c+world%21")
print(coverage)
print(t)

上面乱糟糟的输出一串数字，如何知道对应的是代码的哪一行?

背景：[inspect](https://docs.python.org/zh-cn/3/library/inspect.html) | [pygments](https://pygments.org/)

In [None]:
# 获取object源码的txt
import inspect
cgi_decode_code = inspect.getsource(cgi_decode)

In [None]:
# 直接输出不好看，高亮显示
# print(cgi_decode_source)
from fuzzingbook.fuzzingbook_utils import syntax_highlighting
syntax_highlighting.print_content(cgi_decode_code,'.py')

In [None]:
# 将源代码文本按照行进行划分
# 注意：让下标从1开始；函数的上面不要写注释，注释在函数里面写，因为会影响行号
# 原因未知
cgi_decode_lines = [""] + cgi_decode_code.splitlines()
coverage = set(coverage)
for lineno in range(1,len(cgi_decode_lines)):
    if lineno not in coverage:
        print('# ', end="")
    else:
        print('  ', end="")
    print(f"{lineno} ",end="")
    syntax_highlighting.print_content(cgi_decode_lines[lineno],'.py')

## Coverage class
上面将covergae作为全局变量不好处理；咋办呢？放在对象里面就好；

In [None]:
class Coverage(object):
    # 代码写的很好:
    # 如果原来的代码已经settrace,在原来trace函数的基础上，添加获取覆盖率的代码；退出之后，仍然使用原来的trace函数
    # 如果原来的代码没有settrace,我们设置的trace仅获取代码覆盖率；退出之后，trace函数的位置设置为None
    def traceit(self,frame,event,arg):
        if self.origin_trace_function is not None:
            self.origin_trace_function(frame,event,arg)
        if event == "line":
            function_name = frame.f_code.co_name
            lineno = frame.f_lineno
            self._trace.append((function_name, lineno))
        return self.traceit
    
    def __init__(self):
        self._trace = []

    def __enter__(self):
        self.origin_trace_function = sys.gettrace()
        sys.settrace(self.traceit)
        return self
    
    def __exit__(self,exc_type, exc_value, tb):
        sys.settrace(self.origin_trace_function)
    
    def trace(self):
        return self._trace
    
    def coverage(self):
        return set(self.trace())

In [None]:
# 测试下上面的Coverage class
with Coverage() as cov:
    cgi_decode("Hello%2c+world%21")
print(cov.coverage())

In [None]:
# cgi_decode的最大覆盖率
with Coverage() as cov_max:
    cgi_decode('+')
    cgi_decode('%20')
    cgi_decode('abc')
    try:
        cgi_decode('%?a')
    except:
        pass

In [None]:
# 比较下最大覆盖率和上面的cov对象的覆盖率
cov_max.coverage() - cov.coverage()

## 基于模糊测试的覆盖率

In [None]:
from fuzzingbook.fuzzingbook_utils import Fuzzer

with Coverage() as cov_fuzz:
    try:
        cgi_decode(Fuzzer.fuzzer())
    except:
        pass
print(cov_fuzz.coverage())

In [None]:
cov_max.coverage() - cov_fuzz.coverage()

In [None]:
def population_coverage(population, function):
    cumulative_coverage = []
    all_coverage = set()

    for s in population:
        with Coverage() as cov:
            try:
                function(s)
            except:
                pass
        all_coverage |= cov.coverage()
        cumulative_coverage.append(len(all_coverage))
    
    return all_coverage,cumulative_coverage

In [None]:
def hundred_inputs(trails):
    population = []
    for i in range(trails):
        population.append(Fuzzer.fuzzer())
    return population

In [None]:
all_coverage, cumulative_coverage = population_coverage(hundred_inputs(100), cgi_decode)

In [None]:
print(hundred_inputs(100))
print(cumulative_coverage)

In [None]:
# 通过折线图查看基于模糊测试的覆盖率变化情况
import matplotlib.pyplot as plt 

plt.plot(cumulative_coverage)
plt.title("Coverage of cgi_decode() with random input")
plt.xlabel("# of input")
plt.ylabel("lines covered")

plt.show()

上面的仅仅跑一轮；多跑几轮，求平均值

In [None]:
runs = 100 # 跑100轮
trails = 100 # 没轮跑100次

sum_coverage = [0]*trails

for run in range(runs):
    all_coverage, cumulative_coverage = population_coverage(hundred_inputs(trails), cgi_decode)
    assert len(cumulative_coverage) == trails
    for i in range(len(cumulative_coverage)):
        sum_coverage[i] += cumulative_coverage[i]

for i in range(trails):
    sum_coverage[i] /= runs

In [None]:
plt.plot(sum_coverage)
plt.title("Coverage of cgi_decode() with random input")
plt.xlabel("# of input")
plt.ylabel("lines covered")

plt.show()

## Find error
很明显的是，上面fuzz的过程中，如果出现error，直接pass,并没有对错误的处理方式，比如保存错误输入。

但是很明显这个代码是有问题的。

比如输入这样的输入：'82 202*&<1&($34\'"/\'.<5/!8"\'5:!4))%;'

## 补充
上面使用的是语句覆盖；分支覆盖和它有有区别的。比如下面这样的语句。对于语句覆盖而言，当条件为真的时候，执行覆盖了所有语句。
但是对于分支覆盖而言，还需要条件为假的情况；实现过程，可以在上面Coverage class的过程中，将记录语句改成记录分支。

In [None]:
if CONDITION:
    do_a()
something_else()