In [1]:
import setup # resolve path to 'src'
import numpy as np
import pandas as pd

from typing import Optional
from build_parse import *
from metrics import *

pd.set_option('display.max_rows', None)

In [2]:
progs = [ CoreutilsProgram(progname) for progname in COREUTILS_PROG_NAMES ]
prognames = [ prog.get_name() for prog in progs ]

prognames_analyze = [ "stat", "nohup", "pinky", "csplit", "ginstall", "fmt", "df", "join", "expr", "seq", "unexpand", "tsort", "tee", "base64", "sum", "cksum", "wc" ]
progs_analyze = []
for progname in prognames_analyze:
    for prog in progs:
        if progname == prog.get_name():
            progs_analyze.append(prog)
            break

def prog_from_progname(progname: str) -> Program:
    for prog in progs:
        if progname == prog.get_name():
            return prog

# Define the build options to test for each program
debug_opts = BuildOptions(debug=True, strip=False, optimization=0)
standard_opts = BuildOptions(debug=False, strip=False, optimization=0)
strip_opts = BuildOptions(debug=False, strip=True, optimization=0)

opts_sets = (debug_opts, standard_opts, strip_opts)

# Get the parser functions
dwarf_parser = get_parser("dwarf")
ghidra_parser = get_parser("ghidra")

# ensure that each program is built according to all variations of build options
for prog in progs:
    for opts in (debug_opts, standard_opts, strip_opts):
        assert(prog.valid_build(opts))

In [3]:

# the filename format for saving parsed ProgramInfo pickle objects
def mangle_proginfo_save_name(parsername: str, prog: Program, opts: BuildOptions) -> str:
    return "{}.{}.pickle".format(prog.get_binary_name(opts), parsername)

def get_proginfo_save_path(parsername: str, prog: Program, opts: BuildOptions) -> Path:
    return PICKLE_CACHE_DIR.joinpath(mangle_proginfo_save_name(parsername, prog, opts))

def save_proginfo(proginfo: ProgramInfo, parsername: str, prog: Program, opts: BuildOptions):
    save_pickle(proginfo, get_proginfo_save_path(parsername, prog, opts))

def load_proginfo(parsername: str, prog: Program, opts: BuildOptions) -> ProgramInfo:
    return load_pickle(get_proginfo_save_path(parsername, prog, opts))

# the filename format for saving UnoptimizedProgramInfoCompare2 objects
def mangle_cmp_save_name(prog: Program, opts: BuildOptions) -> str:
    return "{}.cmp.pickle".format(prog.get_binary_name(opts))

def get_cmp_save_path(prog: Program, opts: BuildOptions) -> Path:
    return PICKLE_CACHE_DIR.joinpath(mangle_cmp_save_name(prog, opts))

def save_cmp(cmp: UnoptimizedProgramInfoCompare2, prog: Program, opts: BuildOptions):
    save_pickle(cmp, get_cmp_save_path(prog, opts))

def load_cmp(prog: Program, opts: BuildOptions) -> UnoptimizedProgramInfoCompare2:
    return load_pickle(get_cmp_save_path(prog, opts))

In [4]:
# DWARF: only parse with the debug build options
# Ghidra: parse with all variations of build options
# Cache the results in local pickle_cache directory, named based on the 'mangle' scheme

reparse = False # should we re-parse even if we already parsed and cached a program?
skip_parsing = True # should we skip the parsing? set to True if we already parsed & cached

class ParseException(Exception):
    pass

def parse(parser: Callable, prog: Program, opts: BuildOptions) -> Optional[ProgramInfo]:
    try:
        return parser(prog.get_binary_path(opts))
    except:
        return None

failed = []
if not skip_parsing:
    for prog in progs:
        dwarf_debug_savepath = get_proginfo_save_path("dwarf", prog, debug_opts)
        if reparse or not dwarf_debug_savepath.exists():
            dwarf_debug = parse(dwarf_parser, prog, debug_opts)
            if dwarf_debug is None:
                failed.append(("dwarf", prog.get_name(), debug_opts))
            else:
                save_pickle(dwarf_debug, dwarf_debug_savepath)

        for opts in opts_sets:
            ghidra_parse_savepath = get_proginfo_save_path("ghidra", prog, opts)
            if reparse or not dwarf_debug_savepath.exists():
                ghidra_parse = parse(ghidra_parser, prog, opts)
                if ghidra_parse is None:
                    failed.append(("ghidra", prog.get_name(), opts))
                else:
                    save_pickle(ghidra_parse, ghidra_parse_savepath)

In [5]:
print(failed)

for prog in progs:
    for opts in opts_sets:
        assert(get_proginfo_save_path("ghidra", prog, opts).exists())

[]


In [6]:
# For each program & build options combination, compute & store comparison object

recompare = False
skip_comparisons = True

failed = []
if not skip_comparisons:
    for prog in progs:
        # load the DWARF ground-truth ProgramInfo
        dwarf_proginfo = load_pickle(get_proginfo_save_path("dwarf", prog, debug_opts))
        assert(dwarf_proginfo is not None)

        # for each set of compilation options, load the Ghidra decompiler ProgramInfo
        # then compute & store the comparison object
        for opts in (strip_opts,):
            cmp_save_path = get_cmp_save_path(prog, opts)
            if recompare or not cmp_save_path.exists():
                ghidra_proginfo = load_pickle(get_proginfo_save_path("ghidra", prog, opts))
                assert(ghidra_proginfo is not None)
                try:
                    cmp = compare2(dwarf_proginfo, ghidra_proginfo)
                    save_pickle(cmp, get_cmp_save_path(prog, opts))
                except:
                    failed.append((prog.get_name(), opts))

In [7]:
print(failed)

for prog in progs:
    for opts in opts_sets:
        assert(get_cmp_save_path(prog, opts).exists())

[]


In [8]:
# For each opts, compute the tables

def mangle_table_save_name(
    tablename: str,
    opts: BuildOptions
) -> str:
    return "{}{}.csv".format(tablename, suffix(opts))

def mangle_table_display_name(
    tablename: str,
    opts: BuildOptions
) -> str:
    def _suffix(opts: BuildOptions) -> str:
        return "(optimization={}, stripped={}, debug={})".format(opts.optimization, opts.strip, opts.debug)

    return "{} {}".format(tablename, _suffix(opts))

def get_table_save_path(
    tablename: str,
    opts: BuildOptions
) -> Path:
    return DATA_DIR.joinpath(mangle_table_save_name(tablename, opts))

def load_table(
    tablename: str,
    opts: BuildOptions
) -> pd.DataFrame:
    return pd.read_csv(get_table_save_path(tablename, opts), index_col=0)

def load_table_filter_analyzed(tablename: str, opts: BuildOptions) -> pd.DataFrame:
    return load_table(tablename, opts).filter(prognames_analyze, axis=0)

In [9]:
metrics_groups = make_metrics()

bytes_group = metrics_groups[0]
functions_group = metrics_groups[1]
varnodes_group = metrics_groups[2]
decomposed_varnodes_group = metrics_groups[9]
array_comparisons_group = metrics_groups[13]

primitive_metatypes = [MetaType.INT, MetaType.FLOAT, MetaType.POINTER]
complex_metatypes = [MetaType.ARRAY, MetaType.STRUCT, MetaType.UNION]

def varnodes_group_metatype(metatype: int) -> MetricsGroup:
    _map = dict([ (meta, i) for i, meta in enumerate(primitive_metatypes + complex_metatypes, 3) ])
    return metrics_groups[_map[metatype]]

varnodes_groups_metatypes = [ varnodes_group_metatype(metatype) for metatype in (primitive_metatypes + complex_metatypes) ]

def decomposed_varnodes_group_metatype(metatype: int) -> MetricsGroup:
    _map = dict([ (meta, i) for i, meta in enumerate(primitive_metatypes, 10) ])
    return metrics_groups[_map[metatype]]

decomposed_varnodes_groups_metatypes = [ decomposed_varnodes_group_metatype(metatype) for metatype in primitive_metatypes ]

high_varnodes_groups = [varnodes_group] + varnodes_groups_metatypes
decomposed_varnodes_groups = [decomposed_varnodes_group] + decomposed_varnodes_groups_metatypes

In [10]:
recompute = True
skip_compute_metrics = False

if not skip_compute_metrics:
    for opts in (debug_opts,):
        cmps = [ load_cmp(prog, opts) for prog in progs ]
        for grp in decomposed_varnodes_groups_metatypes:
            save_path = get_table_save_path(grp.get_name(), opts)
            tablename = mangle_table_display_name(grp.get_display_name(), opts)
            print(tablename)
            if recompute or not save_path.exists():
                df = compute_comparisons_metrics_dataframe(prognames, cmps, grp.get_metrics())
                df.to_csv(save_path)


VARNODES (decomposed) (metatype = INT) (optimization=0, stripped=False, debug=True)
VARNODES (decomposed) (metatype = FLOAT) (optimization=0, stripped=False, debug=True)
VARNODES (decomposed) (metatype = POINTER) (optimization=0, stripped=False, debug=True)


In [11]:
def get_table(
    grp: MetricsGroup,
    opts: BuildOptions,
    analyzed_only: bool = True
)-> pd.DataFrame:
    df = load_table(grp.get_name(), opts)
    return df if not analyzed_only else df.filter(prognames_analyze, axis=0)

def display_analyzed_tables(
    metrics_groups: List[MetricsGroup],
    opts_sets: List[BuildOptions],
    analyzed_only: bool = True
):
    for grp in metrics_groups:
        for opts in opts_sets:
            table_display_name = mangle_table_display_name(grp.get_display_name(), opts)
            df = get_table(grp, opts, analyzed_only=analyzed_only)

            print("{} {} {}".format("-"*10, table_display_name, "-"*10))
            display(df)

In [12]:
for grp in (varnodes_group,):
    for opts in (strip_opts,):
        df = get_table(grp, opts)
        df["Varnodes fraction partially recovered"] = df.iloc[:,2:6].sum(axis=1) / df.iloc[:,0]
        df["Varnodes fraction exactly recovered"] = df.iloc[:,5] / df.iloc[:,0]
        display(df)

        tmp = df.iloc[:,6:].mean(axis=0)
        display(tmp)

Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=NO_MATCH,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,"Varnode average comparison score [0,1]",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,608,5,21,14,321,247,0.822368,0.991776,0.40625
nohup,162,1,7,4,105,45,0.787037,0.993827,0.277778
pinky,182,1,11,2,110,58,0.792582,0.994505,0.318681
csplit,1003,8,41,15,544,395,0.818295,0.992024,0.393819
fmt,186,1,6,2,110,67,0.817204,0.994624,0.360215
df,646,1,20,11,287,327,0.85565,0.998452,0.506192
join,260,1,8,3,151,97,0.822115,0.996154,0.373077
expr,932,8,37,25,499,363,0.814378,0.991416,0.389485
seq,279,1,20,6,128,124,0.817204,0.996416,0.444444
unexpand,158,1,6,2,92,57,0.813291,0.993671,0.360759


Varnode average comparison score [0,1]    0.808120
Varnodes fraction partially recovered     0.985349
Varnodes fraction exactly recovered       0.376531
dtype: float64

In [16]:
varnode_compare_levels = list(VarnodeCompareLevel.range())
varnode_compare_level_labels = [ VarnodeCompareLevel.to_string(level) for level in varnode_compare_levels ]

metatypes = primitive_metatypes + complex_metatypes
metatype_labels = [ MetaType.repr(metatype) for metatype in metatypes ]

for opts in opts_sets:
    seriess = []
    for metatype in metatypes:
        metatype_str = MetaType.repr(metatype)
        grp = varnodes_group_metatype(metatype)
        df = get_table(grp, opts)
        metatype_varnodes = df.iloc[:,0].sum()
        varnodes_by_levels = df.iloc[:,1:6].sum(axis=0)
        varnodes_by_levels.index = varnode_compare_level_labels
        level_ratios = varnodes_by_levels / metatype_varnodes
        seriess.append(level_ratios)
    
    high_df = pd.DataFrame(
        seriess,
        index=[ MetaType.repr(metatype) for metatype in metatypes ],
        columns=varnode_compare_level_labels
    )
    high_tablename = "metatype_match_levels_ratios"
    print("{}{}".format(high_tablename, suffix(opts)))
    display(high_df)
    # savepath = get_table_save_path(high_tablename, opts)
    # save_path()

    decomposed_seriess = []
    for metatype in primitive_metatypes:
        metatype_str = MetaType.repr(metatype)
        grp = decomposed_varnodes_group_metatype(metatype)
        df = get_table(grp, opts)
        metatype_varnodes = df.iloc[:,0].sum()
        varnodes_by_levels = df.iloc[:,1:6].sum(axis=0)
        varnodes_by_levels.index = varnode_compare_level_labels
        level_ratios = varnodes_by_levels / metatype_varnodes
        decomposed_seriess.append(level_ratios)

    decomposed_df = pd.DataFrame(
        decomposed_seriess,
        index=[ MetaType.repr(metatype) for metatype in primitive_metatypes ],
        columns=varnode_compare_level_labels
    )
    decomposed_tablename = "metatype_match_levels_ratios_decomposed"
    print("{}{}".format(decomposed_tablename, suffix(opts)))
    display(decomposed_df)

metatype_match_levels_ratios_O0_debug


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007421,0.00053,0.0,0.00318,0.988868
FLOAT,0.0,0.0,0.0,0.0,1.0
POINTER,0.009042,0.0,0.0,0.0,0.990958
ARRAY,0.142857,0.002421,0.009685,0.0,0.845036
STRUCT,0.0,0.002959,0.0,0.0,0.997041
UNION,,,,,


metatype_match_levels_ratios_decomposed_O0_debug


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.044064,9.7e-05,0.0,0.000106,0.955732
FLOAT,0.0,0.0,0.0,0.0,1.0
POINTER,0.002075,0.0,0.0,0.0,0.997925


metatype_match_levels_ratios_O0


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007951,0.000795,0.0,0.571959,0.419295
FLOAT,0.0,0.485714,0.0,0.371429,0.142857
POINTER,0.016275,0.000603,0.0,0.6088,0.374322
ARRAY,0.22276,0.20339,0.152542,0.01937,0.401937
STRUCT,0.0,0.408284,0.14497,0.349112,0.097633
UNION,,,,,


metatype_match_levels_ratios_decomposed_O0


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.0909,0.262623,0.0,0.545198,0.101279
FLOAT,0.0,0.651163,0.0,0.232558,0.116279
POINTER,0.007194,0.442031,0.0,0.40938,0.141395


metatype_match_levels_ratios_O0_strip


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007951,0.000795,0.0,0.571959,0.419295
FLOAT,0.0,0.485714,0.0,0.371429,0.142857
POINTER,0.016275,0.000603,0.0,0.6088,0.374322
ARRAY,0.22276,0.20339,0.152542,0.01937,0.401937
STRUCT,0.0,0.408284,0.14497,0.349112,0.097633
UNION,,,,,


metatype_match_levels_ratios_decomposed_O0_strip


Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.279891,0.164683,0.0,0.455396,0.10003
FLOAT,0.093023,0.55814,0.0,0.232558,0.116279
POINTER,0.355562,0.082042,0.0,0.414084,0.148312


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=POINTER),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=POINTER),Varnodes matched @ level=OVERLAP (decomposed) (metatype=POINTER),Varnodes matched @ level=SUBSET (decomposed) (metatype=POINTER),Varnodes matched @ level=ALIGNED (decomposed) (metatype=POINTER),Varnodes matched @ level=MATCH (decomposed) (metatype=POINTER),"Varnode average compare score [0,1] (decomposed) (metatype=POINTER)"
stat,286,34,6,0,103,143,0.77535
nohup,106,18,4,0,52,32,0.679245
pinky,120,21,4,0,60,35,0.675
csplit,1424,44,266,0,1024,90,0.649228
fmt,2126,2031,4,0,55,36,0.036806
df,424,71,6,0,140,207,0.739387
join,149,34,4,0,65,46,0.642617
expr,1407,28,266,0,1022,91,0.656716
seq,193,24,4,0,61,104,0.781088
unexpand,104,25,4,0,47,28,0.617788


In [14]:
analyzed_opts_sets = (strip_opts, debug_opts)

display_analyzed_tables(
    decomposed_varnodes_groups,
    analyzed_opts_sets,
    analyzed_only=True
)

---------- VARNODES (decomposed) (optimization=0, stripped=True, debug=False) ----------


Unnamed: 0,Ground truth varnodes (decomposed),Varnodes matched @ level=NO_MATCH (decomposed),Varnodes matched @ level=OVERLAP (decomposed),Varnodes matched @ level=SUBSET (decomposed),Varnodes matched @ level=ALIGNED (decomposed),Varnodes matched @ level=MATCH (decomposed),"Varnode average comparison score [0,1] (decomposed)"
stat,3141,225,310,0,785,1821,0.791866
nohup,874,47,125,0,385,317,0.728833
pinky,3335,47,137,0,2518,633,0.766342
csplit,5532,455,584,0,2129,2364,0.742363
fmt,15915,47,10153,0,5382,333,0.43404
df,3581,49,342,0,2002,1188,0.774923
join,1004,47,166,0,424,367,0.723606
expr,5727,601,552,0,2275,2299,0.723459
seq,1136,47,191,0,458,440,0.731734
unexpand,874,47,139,0,364,324,0.722826


---------- VARNODES (decomposed) (optimization=0, stripped=False, debug=True) ----------


Unnamed: 0,Ground truth varnodes (decomposed),Varnodes matched @ level=NO_MATCH (decomposed),Varnodes matched @ level=OVERLAP (decomposed),Varnodes matched @ level=SUBSET (decomposed),Varnodes matched @ level=ALIGNED (decomposed),Varnodes matched @ level=MATCH (decomposed),"Varnode average comparison score [0,1] (decomposed)"
stat,3141,0,0,0,0,3141,1.0
nohup,874,0,0,0,0,874,1.0
pinky,3335,224,0,0,0,3111,0.932834
csplit,5532,295,2,0,6,5229,0.946132
fmt,15915,4000,0,0,0,11915,0.748665
df,3581,0,7,0,0,3574,0.998534
join,1004,0,0,0,0,1004,1.0
expr,5727,302,2,0,6,5417,0.946743
seq,1136,0,0,0,0,1136,1.0
unexpand,874,0,0,0,0,874,1.0


---------- VARNODES (decomposed) (metatype = INT) (optimization=0, stripped=True, debug=False) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=INT),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=INT),Varnodes matched @ level=OVERLAP (decomposed) (metatype=INT),Varnodes matched @ level=SUBSET (decomposed) (metatype=INT),Varnodes matched @ level=ALIGNED (decomposed) (metatype=INT),Varnodes matched @ level=MATCH (decomposed) (metatype=INT),"Varnode average compare score [0,1] (decomposed) (metatype=INT)"
stat,2845,707,174,0,312,1652,0.678207
nohup,767,329,69,0,82,287,0.476858
pinky,3214,404,76,0,2152,582,0.689172
csplit,4086,774,181,0,863,2268,0.724547
fmt,13788,13343,69,0,77,299,0.027125
df,3127,437,170,0,1620,900,0.689958
join,854,347,71,0,113,323,0.498244
expr,4293,901,192,0,997,2203,0.698521
seq,921,372,96,0,112,341,0.487514
unexpand,769,335,69,0,66,299,0.475618


---------- VARNODES (decomposed) (metatype = INT) (optimization=0, stripped=False, debug=True) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=INT),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=INT),Varnodes matched @ level=OVERLAP (decomposed) (metatype=INT),Varnodes matched @ level=SUBSET (decomposed) (metatype=INT),Varnodes matched @ level=ALIGNED (decomposed) (metatype=INT),Varnodes matched @ level=MATCH (decomposed) (metatype=INT),"Varnode average compare score [0,1] (decomposed) (metatype=INT)"
stat,2845,0,0,0,0,2845,1.0
nohup,767,0,0,0,0,767,1.0
pinky,3214,224,0,0,0,2990,0.930305
csplit,4086,295,2,0,6,3783,0.927068
fmt,13788,4000,0,0,0,9788,0.709893
df,3127,0,7,0,0,3120,0.998321
join,854,0,0,0,0,854,1.0
expr,4293,302,2,0,6,3983,0.928954
seq,921,0,0,0,0,921,1.0
unexpand,769,0,0,0,0,769,1.0


---------- VARNODES (decomposed) (metatype = FLOAT) (optimization=0, stripped=True, debug=False) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=FLOAT),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=FLOAT),Varnodes matched @ level=OVERLAP (decomposed) (metatype=FLOAT),Varnodes matched @ level=SUBSET (decomposed) (metatype=FLOAT),Varnodes matched @ level=ALIGNED (decomposed) (metatype=FLOAT),Varnodes matched @ level=MATCH (decomposed) (metatype=FLOAT),"Varnode average compare score [0,1] (decomposed) (metatype=FLOAT)"
stat,2,0,1,0,1,0,0.5
nohup,0,0,0,0,0,0,
pinky,0,0,0,0,0,0,
csplit,0,0,0,0,0,0,
fmt,0,0,0,0,0,0,
df,17,4,4,0,4,5,0.529412
join,0,0,0,0,0,0,
expr,0,0,0,0,0,0,
seq,14,0,13,0,1,0,0.285714
unexpand,0,0,0,0,0,0,


---------- VARNODES (decomposed) (metatype = FLOAT) (optimization=0, stripped=False, debug=True) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=FLOAT),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=FLOAT),Varnodes matched @ level=OVERLAP (decomposed) (metatype=FLOAT),Varnodes matched @ level=SUBSET (decomposed) (metatype=FLOAT),Varnodes matched @ level=ALIGNED (decomposed) (metatype=FLOAT),Varnodes matched @ level=MATCH (decomposed) (metatype=FLOAT),"Varnode average compare score [0,1] (decomposed) (metatype=FLOAT)"
stat,2,0,0,0,0,2,1.0
nohup,0,0,0,0,0,0,
pinky,0,0,0,0,0,0,
csplit,0,0,0,0,0,0,
fmt,0,0,0,0,0,0,
df,17,0,0,0,0,17,1.0
join,0,0,0,0,0,0,
expr,0,0,0,0,0,0,
seq,14,0,0,0,0,14,1.0
unexpand,0,0,0,0,0,0,


---------- VARNODES (decomposed) (metatype = POINTER) (optimization=0, stripped=True, debug=False) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=POINTER),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=POINTER),Varnodes matched @ level=OVERLAP (decomposed) (metatype=POINTER),Varnodes matched @ level=SUBSET (decomposed) (metatype=POINTER),Varnodes matched @ level=ALIGNED (decomposed) (metatype=POINTER),Varnodes matched @ level=MATCH (decomposed) (metatype=POINTER),"Varnode average compare score [0,1] (decomposed) (metatype=POINTER)"
stat,286,34,6,0,103,143,0.77535
nohup,106,18,4,0,52,32,0.679245
pinky,120,21,4,0,60,35,0.675
csplit,1424,44,266,0,1024,90,0.649228
fmt,2126,2031,4,0,55,36,0.036806
df,424,71,6,0,140,207,0.739387
join,149,34,4,0,65,46,0.642617
expr,1407,28,266,0,1022,91,0.656716
seq,193,24,4,0,61,104,0.781088
unexpand,104,25,4,0,47,28,0.617788


---------- VARNODES (decomposed) (metatype = POINTER) (optimization=0, stripped=False, debug=True) ----------


Unnamed: 0,Ground truth varnodes (decomposed) (metatype=POINTER),Varnodes matched @ level=NO_MATCH (decomposed) (metatype=POINTER),Varnodes matched @ level=OVERLAP (decomposed) (metatype=POINTER),Varnodes matched @ level=SUBSET (decomposed) (metatype=POINTER),Varnodes matched @ level=ALIGNED (decomposed) (metatype=POINTER),Varnodes matched @ level=MATCH (decomposed) (metatype=POINTER),"Varnode average compare score [0,1] (decomposed) (metatype=POINTER)"
stat,286,0,0,0,0,286,1.0
nohup,106,0,0,0,0,106,1.0
pinky,120,0,0,0,0,120,1.0
csplit,1424,0,0,0,0,1424,1.0
fmt,2126,0,0,0,0,2126,1.0
df,424,0,0,0,0,424,1.0
join,149,0,0,0,0,149,1.0
expr,1407,0,0,0,0,1407,1.0
seq,193,0,0,0,0,193,1.0
unexpand,104,0,0,0,0,104,1.0


In [15]:
cmp = load_cmp(prog_from_progname("cksum"), debug_opts)
truth = sum([ varnode.get_size() for varnode in varnodes_truth(cmp) ])
missed = sum([ varnode.get_size() for varnode in varnodes_missed(cmp) ])
overlapped = varnode_compare_records_matched_at_level(cmp, VarnodeCompareLevel.OVERLAP)
for varnode in varnodes_missed(cmp):
    print(varnode.get_var().get_parent_function().get_name())

# for record in overlapped:
#     print(record.get_varnode().get_var().get_parent_function().get_name())
# print(overlapped)

cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul