In [153]:
import glob
import re
import statistics
import numpy as np

def extreact_trace_bits_from_dir(dir_name):
    file_paths = glob.glob(dir_name)
    file_paths = sorted(file_paths, key=lambda x: int(x.split("/")[-1].split(".")[0].split("_")[-1]))
    res = {}
    for i, file_path in enumerate(file_paths):
        with open(file_path, "r") as file:
            res[i+1] = {}
            for line in file:
                if "count" in line:
                    continue
                if "trace_bits" in line:
                    pattern = r"trace_bits\[(\d+)\]: (\d+)"
                    matches = re.findall(pattern, line)
                    if matches:
                        pos = int(matches[0][0])
                        count = int(matches[0][1])
                        res[i+1][pos] = count
    return res

def compare_trace_bits(dict1, dict2):
    keys1 = set(dict1.keys())
    keys2 = set(dict2.keys())
    keys = keys1.union(keys2)
    count = 0
    for key in keys:
        if key in dict1 and key in dict2 and dict1[key] == dict2[key]:
            count += 1
    
    return count / len(keys)

def get_result(data):
    # 最大值
    maximum = max(data)
    # 最小值
    minimum = min(data)
    # 平均值
    mean = statistics.mean(data)
    # 中位数
    median = statistics.median(data)
    # 众数
    mode = statistics.mode(data)
    return maximum, minimum, mean, median, mode

def gen_result_lab_1(dir_names, thresholds, log_file):
    # 初始化
    results = {}
    for threshold in thresholds:
        results[threshold] = []

    print("=====================================", file=log_file)
    print("最大值\t最小值\t平均值\t中位数\t众数", file=log_file)
    for dir_name in dir_names:
        res = extreact_trace_bits_from_dir(dir_name)
        print(f"{dir_name}:", file=log_file)

        for threshold in thresholds:
            family_count = {}
            for i in range(1, len(res)+1):
                family_count[i] = 0
                for j in range(1, len(res)+1):
                    if compare_trace_bits(res[i], res[j]) > threshold:
                        family_count[i] += 1
            maximum, minimum, mean, median,mode_value = get_result(list(family_count.values()))
            results[threshold].append((maximum, minimum, mean, median, mode_value))
            print(f"{maximum}\t{minimum}\t{mean}\t{median}\t{mode_value}\tthreshold: {threshold}", file=log_file)


    print("最终结果: ", file=log_file)
    for threshold in thresholds:
        # 计算平均值
        maximum, minimum, mean, median,mode_value = np.mean(results[threshold],axis=0)
        print(f"{maximum}\t{minimum}\t{mean}\t{median}\t{mode_value}\tthreshold: {threshold}", file=log_file)


In [156]:
dir_names = ["data/afl-spy-logs-user-1/*", "data/afl-spy-logs-user-2/*", "data/afl-spy-logs-user-3/*"]
thresholds = [0.9, 0.95, 0.99, 0.995, 0.999, 0.9995, 0.9999]

dir_name = "data/trace_bits_data/aflnet_user/afl-spy-logs-1/*"
# dir_name = "/home/czx/afl-workspace/aflnet/spy-test-http/afl-spy-logs-system-4/*"
threshold = 0.5
res = extreact_trace_bits_from_dir(dir_name)
print(f"{dir_name}:",)

family_count = {}
for i in range(1, len(res)+1):
    family_count[i] = 0
    for j in range(1, len(res)+1):
        if compare_trace_bits(res[i], res[j]) > threshold:
            family_count[i] += 1
maximum, minimum, mean, median,mode_value = get_result(list(family_count.values()))
print(f"{maximum}\t{minimum}\t{mean}\t{median}\t{mode_value}\tthreshold: {threshold}")

data/trace_bits_data/aflnet_user/afl-spy-logs-1/*:
100	100	100	100.0	100	threshold: 0.5


In [155]:
res

{}