In [47]:
import setup # resolve path to 'src'
import numpy as np
import pandas as pd

from typing import Optional
from build_parse import *
from metrics import *

pd.set_option('display.max_rows', None)

In [48]:
progs = [ CoreutilsProgram(progname) for progname in COREUTILS_PROG_NAMES ]
prognames = [ prog.get_name() for prog in progs ]

prognames_analyze = [ "stat", "nohup", "pinky", "csplit", "ginstall", "fmt", "df", "join", "expr", "seq", "unexpand", "tsort", "tee", "base64", "sum", "cksum", "wc" ]
progs_analyze = []
for progname in prognames_analyze:
    for prog in progs:
        if progname == prog.get_name():
            progs_analyze.append(prog)
            break

def prog_from_progname(progname: str) -> Program:
    for prog in progs:
        if progname == prog.get_name():
            return prog

# Define the build options to test for each program
debug_opts = BuildOptions(debug=True, strip=False, optimization=0)
standard_opts = BuildOptions(debug=False, strip=False, optimization=0)
strip_opts = BuildOptions(debug=False, strip=True, optimization=0)

opts_sets = (strip_opts, standard_opts, debug_opts)

# Get the parser functions
dwarf_parser = get_parser("dwarf")
ghidra_parser = get_parser("ghidra")

varnode_compare_levels = list(VarnodeCompareLevel.range())
varnode_compare_level_labels = [ VarnodeCompareLevel.to_string(level) for level in varnode_compare_levels ]

primitive_metatypes = [MetaType.INT, MetaType.FLOAT, MetaType.POINTER]
complex_metatypes = [MetaType.ARRAY, MetaType.STRUCT, MetaType.UNION]
metatypes = primitive_metatypes + complex_metatypes
metatype_labels = [ MetaType.repr(metatype) for metatype in metatypes ]

# ensure that each program is built according to all variations of build options
for prog in progs:
    for opts in (debug_opts, standard_opts, strip_opts):
        assert(prog.valid_build(opts))

In [49]:

# the filename format for saving parsed ProgramInfo pickle objects
def mangle_proginfo_save_name(parsername: str, prog: Program, opts: BuildOptions) -> str:
    return "{}.{}.pickle".format(prog.get_binary_name(opts), parsername)

def get_proginfo_save_path(parsername: str, prog: Program, opts: BuildOptions) -> Path:
    return PICKLE_CACHE_DIR.joinpath(mangle_proginfo_save_name(parsername, prog, opts))

def save_proginfo(proginfo: ProgramInfo, parsername: str, prog: Program, opts: BuildOptions):
    save_pickle(proginfo, get_proginfo_save_path(parsername, prog, opts))

def load_proginfo(parsername: str, prog: Program, opts: BuildOptions) -> ProgramInfo:
    return load_pickle(get_proginfo_save_path(parsername, prog, opts))

# the filename format for saving UnoptimizedProgramInfoCompare2 objects
def mangle_cmp_save_name(prog: Program, opts: BuildOptions) -> str:
    return "{}.cmp.pickle".format(prog.get_binary_name(opts))

def get_cmp_save_path(prog: Program, opts: BuildOptions) -> Path:
    return PICKLE_CACHE_DIR.joinpath(mangle_cmp_save_name(prog, opts))

def save_cmp(cmp: UnoptimizedProgramInfoCompare2, prog: Program, opts: BuildOptions):
    save_pickle(cmp, get_cmp_save_path(prog, opts))

def load_cmp(prog: Program, opts: BuildOptions) -> UnoptimizedProgramInfoCompare2:
    return load_pickle(get_cmp_save_path(prog, opts))

In [50]:
# DWARF: only parse with the debug build options
# Ghidra: parse with all variations of build options
# Cache the results in local pickle_cache directory, named based on the 'mangle' scheme

reparse = False # should we re-parse even if we already parsed and cached a program?
skip_parsing = True # should we skip the parsing? set to True if we already parsed & cached

class ParseException(Exception):
    pass

def parse(parser: Callable, prog: Program, opts: BuildOptions) -> Optional[ProgramInfo]:
    try:
        return parser(prog.get_binary_path(opts))
    except:
        return None

failed = []
if not skip_parsing:
    for prog in progs:
        dwarf_debug_savepath = get_proginfo_save_path("dwarf", prog, debug_opts)
        if reparse or not dwarf_debug_savepath.exists():
            dwarf_debug = parse(dwarf_parser, prog, debug_opts)
            if dwarf_debug is None:
                failed.append(("dwarf", prog.get_name(), debug_opts))
            else:
                save_pickle(dwarf_debug, dwarf_debug_savepath)

        for opts in opts_sets:
            ghidra_parse_savepath = get_proginfo_save_path("ghidra", prog, opts)
            if reparse or not dwarf_debug_savepath.exists():
                ghidra_parse = parse(ghidra_parser, prog, opts)
                if ghidra_parse is None:
                    failed.append(("ghidra", prog.get_name(), opts))
                else:
                    save_pickle(ghidra_parse, ghidra_parse_savepath)

In [51]:
print(failed)

for prog in progs:
    for opts in opts_sets:
        assert(get_proginfo_save_path("ghidra", prog, opts).exists())

[]


In [52]:
# For each program & build options combination, compute & store comparison object

recompare = False
skip_comparisons = True

failed = []
if not skip_comparisons:
    for prog in progs:
        # load the DWARF ground-truth ProgramInfo
        dwarf_proginfo = load_pickle(get_proginfo_save_path("dwarf", prog, debug_opts))
        assert(dwarf_proginfo is not None)

        # for each set of compilation options, load the Ghidra decompiler ProgramInfo
        # then compute & store the comparison object
        for opts in (strip_opts,):
            cmp_save_path = get_cmp_save_path(prog, opts)
            if recompare or not cmp_save_path.exists():
                ghidra_proginfo = load_pickle(get_proginfo_save_path("ghidra", prog, opts))
                assert(ghidra_proginfo is not None)
                try:
                    cmp = compare2(dwarf_proginfo, ghidra_proginfo)
                    save_pickle(cmp, get_cmp_save_path(prog, opts))
                except:
                    failed.append((prog.get_name(), opts))

In [53]:
print(failed)

for prog in progs:
    for opts in opts_sets:
        assert(get_cmp_save_path(prog, opts).exists())

[]


In [54]:
# For each opts, compute the tables

def mangle_table_save_name(
    tablename: str,
    opts: BuildOptions
) -> str:
    return "{}{}.csv".format(tablename, suffix(opts))

def build_options_display_suffix(opts: BuildOptions) -> str:
    return "(optimization={}, stripped={}, debug={})".format(opts.optimization, opts.strip, opts.debug)

def mangle_table_display_name(
    tablename: str,
    opts: BuildOptions
) -> str:

    return "{} {}".format(tablename, build_options_display_suffix(opts))

def get_table_save_path(
    tablename: str,
    opts: BuildOptions
) -> Path:
    return DATA_DIR.joinpath(mangle_table_save_name(tablename, opts))

def load_table(
    tablename: str,
    opts: BuildOptions
) -> pd.DataFrame:
    return pd.read_csv(get_table_save_path(tablename, opts), index_col=0)

def load_table_filter_analyzed(tablename: str, opts: BuildOptions) -> pd.DataFrame:
    return load_table(tablename, opts).filter(prognames_analyze, axis=0)

In [55]:
metrics_groups = make_metrics()

bytes_group = metrics_groups[0]
functions_group = metrics_groups[1]
varnodes_group = metrics_groups[2]
decomposed_varnodes_group = metrics_groups[9]
array_comparisons_group = metrics_groups[13]

def varnodes_group_metatype(metatype: int) -> MetricsGroup:
    _map = dict([ (meta, i) for i, meta in enumerate(primitive_metatypes + complex_metatypes, 3) ])
    return metrics_groups[_map[metatype]]

varnodes_groups_metatypes = [ varnodes_group_metatype(metatype) for metatype in (primitive_metatypes + complex_metatypes) ]

def decomposed_varnodes_group_metatype(metatype: int) -> MetricsGroup:
    _map = dict([ (meta, i) for i, meta in enumerate(primitive_metatypes, 10) ])
    return metrics_groups[_map[metatype]]

decomposed_varnodes_groups_metatypes = [ decomposed_varnodes_group_metatype(metatype) for metatype in primitive_metatypes ]

high_varnodes_groups = [varnodes_group] + varnodes_groups_metatypes
decomposed_varnodes_groups = [decomposed_varnodes_group] + decomposed_varnodes_groups_metatypes

In [56]:
recompute = False
skip_compute_metrics = True

if not skip_compute_metrics:
    for opts in opts_sets:
        cmps = [ load_cmp(prog, opts) for prog in progs ]
        for grp in metrics_groups:
            save_path = get_table_save_path(grp.get_name(), opts)
            tablename = mangle_table_display_name(grp.get_display_name(), opts)
            print(tablename)
            if recompute or not save_path.exists():
                df = compute_comparisons_metrics_dataframe(prognames, cmps, grp.get_metrics())
                df.to_csv(save_path)


In [57]:
def get_table(
    grp: MetricsGroup,
    opts: BuildOptions,
    analyzed_only: bool = True
)-> pd.DataFrame:
    df = load_table(grp.get_name(), opts)
    return df if not analyzed_only else df.filter(prognames_analyze, axis=0)

def display_analyzed_tables(
    metrics_groups: List[MetricsGroup],
    opts_sets: List[BuildOptions],
    analyzed_only: bool = True
):
    for grp in metrics_groups:
        for opts in opts_sets:
            table_display_name = mangle_table_display_name(grp.get_display_name(), opts)
            df = get_table(grp, opts, analyzed_only=analyzed_only)

            print("{} {} {}".format("-"*10, table_display_name, "-"*10))
            display(df)

In [58]:
skip_fix_varnode_metrics = False

# Add "Varnodes fraction partially recovered" & "Varnodes fraction exactly recovered" columns
# to the varnodes tables (if not already done)
if not skip_fix_varnode_metrics:
    for grp in high_varnodes_groups + decomposed_varnodes_groups:
        for opts in opts_sets:
            df = get_table(grp, opts, analyzed_only=False)
            df["Varnodes fraction partially recovered"] = df.iloc[:,2:6].sum(axis=1) / df.iloc[:,0]
            df["Varnodes fraction exactly recovered"] = df.iloc[:,5] / df.iloc[:,0]
            savepath = get_table_save_path(grp.get_name(), opts)
            df.to_csv(savepath)

def get_varnode_group_average_stats(grp: MetricsGroup, analyzed_only: bool = True) -> pd.Series:
    df = get_table(grp, opts, analyzed_only=analyzed_only)
    return df.iloc[:,6:].mean(axis=0)

In [59]:
skip_generate_metatype_level_summaries = True

if not skip_generate_metatype_level_summaries:
    for opts in opts_sets:
        for analyzed_only in (True, False):
            _suffix = "_analyzed_only" if analyzed_only else ""
            seriess = []
            for metatype in metatypes[:-1]:
                metatype_str = MetaType.repr(metatype)
                grp = varnodes_group_metatype(metatype)
                df = get_table(grp, opts, analyzed_only=analyzed_only)
                metatype_varnodes = df.iloc[:,0].sum()
                varnodes_by_levels = df.iloc[:,1:6].sum(axis=0)
                varnodes_by_levels.index = varnode_compare_level_labels
                level_ratios = varnodes_by_levels / metatype_varnodes
                seriess.append(level_ratios)
            
            high_df = pd.DataFrame(
                seriess,
                index=[ MetaType.repr(metatype) for metatype in metatypes[:-1] ],
                columns=varnode_compare_level_labels
            )
            high_tablename = "metatype_match_levels_ratios" + _suffix
            high_savepath = get_table_save_path(high_tablename, opts)
            print("{}{}".format(high_tablename, suffix(opts)))
            display(high_df)
            high_df.to_csv(high_savepath)

            decomposed_seriess = []
            for metatype in primitive_metatypes:
                metatype_str = MetaType.repr(metatype)
                grp = decomposed_varnodes_group_metatype(metatype)
                df = get_table(grp, opts, analyzed_only=analyzed_only)
                metatype_varnodes = df.iloc[:,0].sum()
                varnodes_by_levels = df.iloc[:,1:6].sum(axis=0)
                varnodes_by_levels.index = varnode_compare_level_labels
                level_ratios = varnodes_by_levels / metatype_varnodes
                decomposed_seriess.append(level_ratios)

            decomposed_df = pd.DataFrame(
                decomposed_seriess,
                index=[ MetaType.repr(metatype) for metatype in primitive_metatypes ],
                columns=varnode_compare_level_labels
            )
            decomposed_tablename = "metatype_match_levels_ratios_decomposed" + _suffix
            decomposed_savepath = get_table_save_path(decomposed_tablename, opts)
            print("{}{}".format(decomposed_tablename, suffix(opts)))
            display(decomposed_df)
            decomposed_df.to_csv(decomposed_savepath)

In [60]:
df = load_table("metatype_match_levels_ratios_analyzed_only", debug_opts)
df

Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007421,0.00053,0.0,0.00318,0.988868
FLOAT,0.0,0.0,0.0,0.0,1.0
POINTER,0.009042,0.0,0.0,0.0,0.990958
ARRAY,0.142857,0.002421,0.009685,0.0,0.845036
STRUCT,0.0,0.002959,0.0,0.0,0.997041


In [61]:
# Function Analysis

for opts in opts_sets:
    print(mangle_table_display_name(functions_group.get_name(), opts))
    display(get_table(functions_group, opts, analyzed_only=True))

functions (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth functions,Functions found,Functions missed,Functions recovery fraction
stat,240,240,0,1.0
nohup,115,115,0,1.0
pinky,124,124,0,1.0
csplit,339,339,0,1.0
fmt,131,131,0,1.0
df,266,266,0,1.0
join,152,152,0,1.0
expr,323,323,0,1.0
seq,129,129,0,1.0
unexpand,121,121,0,1.0


functions (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth functions,Functions found,Functions missed,Functions recovery fraction
stat,240,240,0,1.0
nohup,115,115,0,1.0
pinky,124,124,0,1.0
csplit,339,339,0,1.0
fmt,131,131,0,1.0
df,266,266,0,1.0
join,152,152,0,1.0
expr,323,323,0,1.0
seq,129,129,0,1.0
unexpand,121,121,0,1.0


functions (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth functions,Functions found,Functions missed,Functions recovery fraction
stat,240,240,0,1.0
nohup,115,115,0,1.0
pinky,124,124,0,1.0
csplit,339,339,0,1.0
fmt,131,131,0,1.0
df,266,266,0,1.0
join,152,152,0,1.0
expr,323,323,0,1.0
seq,129,129,0,1.0
unexpand,121,121,0,1.0


In [67]:
# Varnode Analysis

for opts in opts_sets:
    print("{} HIGH-LEVEL VARNODES ANALYSIS {} {}".format("-"*10, build_options_display_suffix(opts), "-"*10))
    
    for grp in high_varnodes_groups:
        print(mangle_table_display_name(grp.get_name(), opts))
        display(get_table(grp, opts))
        display(get_varnode_group_average_stats(grp, opts))
    
    display(load_table("metatype_match_levels_ratios_analyzed_only", opts))

---------- HIGH-LEVEL VARNODES ANALYSIS (optimization=0, stripped=True, debug=False) ----------
varnodes (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=NO_MATCH,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,"Varnode average comparison score [0,1]",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,608,20,22,20,313,233,0.794819,0.967105,0.383224
nohup,162,5,8,6,102,41,0.756173,0.969136,0.253086
pinky,182,8,12,5,106,51,0.747253,0.956044,0.28022
csplit,1003,15,43,17,540,388,0.809821,0.985045,0.386839
fmt,186,6,8,4,107,61,0.780914,0.967742,0.327957
df,646,9,25,15,283,314,0.835913,0.986068,0.486068
join,260,9,12,5,145,89,0.781731,0.965385,0.342308
expr,932,15,38,27,496,356,0.805794,0.983906,0.381974
seq,279,8,21,8,121,121,0.792115,0.971326,0.433692
unexpand,158,5,7,5,89,52,0.778481,0.968354,0.329114


Varnode average comparison score [0,1]    0.778015
Varnodes fraction partially recovered     0.961087
Varnodes fraction exactly recovered       0.351030
dtype: float64

varnodes_metatype_INT (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=INT),Decompiler varnodes matched @ level=NO_MATCH (metatype=INT),Decompiler varnodes matched @ level=OVERLAP (metatype=INT),Decompiler varnodes matched @ level=SUBSET (metatype=INT),Decompiler varnodes matched @ level=ALIGNED (metatype=INT),Decompiler varnodes matched @ level=MATCH (metatype=INT),"Varnode average compare score [0,1] (metatype=INT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,392,4,1,0,233,154,0.839286,0.989796,0.392857
nohup,99,0,0,0,71,28,0.820707,1.0,0.282828
pinky,96,0,0,0,65,31,0.830729,1.0,0.322917
csplit,632,0,1,0,331,300,0.86788,1.0,0.474684
fmt,112,0,0,0,72,40,0.839286,1.0,0.357143
df,327,0,0,0,158,169,0.879205,1.0,0.51682
join,162,0,0,0,104,58,0.839506,1.0,0.358025
expr,573,0,1,0,292,280,0.871291,1.0,0.488656
seq,156,0,0,0,78,78,0.875,1.0,0.5
unexpand,101,0,0,0,61,40,0.84901,1.0,0.39604


Varnode average compare score [0,1] (metatype=INT)    0.846835
Varnodes fraction partially recovered                 0.995729
Varnodes fraction exactly recovered                   0.400888
dtype: float64

varnodes_metatype_FLOAT (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=FLOAT),Decompiler varnodes matched @ level=NO_MATCH (metatype=FLOAT),Decompiler varnodes matched @ level=OVERLAP (metatype=FLOAT),Decompiler varnodes matched @ level=SUBSET (metatype=FLOAT),Decompiler varnodes matched @ level=ALIGNED (metatype=FLOAT),Decompiler varnodes matched @ level=MATCH (metatype=FLOAT),"Varnode average compare score [0,1] (metatype=FLOAT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,2,0,1,0,1,0,0.5,1.0,0.0
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,13,0,3,0,5,5,0.730769,1.0,0.384615
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,10,0,9,0,1,0,0.3,1.0,0.0
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=FLOAT)    0.519017
Varnodes fraction partially recovered                   1.000000
Varnodes fraction exactly recovered                     0.064103
dtype: float64

varnodes_metatype_POINTER (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=POINTER),Decompiler varnodes matched @ level=NO_MATCH (metatype=POINTER),Decompiler varnodes matched @ level=OVERLAP (metatype=POINTER),Decompiler varnodes matched @ level=SUBSET (metatype=POINTER),Decompiler varnodes matched @ level=ALIGNED (metatype=POINTER),Decompiler varnodes matched @ level=MATCH (metatype=POINTER),"Varnode average compare score [0,1] (metatype=POINTER)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,150,1,0,0,73,76,0.871667,0.993333,0.506667
nohup,40,0,0,0,28,12,0.825,1.0,0.3
pinky,57,0,0,0,38,19,0.833333,1.0,0.333333
csplit,280,6,0,0,203,71,0.797321,0.978571,0.253571
fmt,52,0,0,0,32,20,0.846154,1.0,0.384615
df,244,0,0,0,110,134,0.887295,1.0,0.54918
join,66,0,0,0,38,28,0.856061,1.0,0.424242
expr,267,6,0,0,198,63,0.792135,0.977528,0.235955
seq,81,0,1,0,39,41,0.87037,1.0,0.506173
unexpand,36,0,0,0,25,11,0.826389,1.0,0.305556


Varnode average compare score [0,1] (metatype=POINTER)    0.835368
Varnodes fraction partially recovered                     0.988616
Varnodes fraction exactly recovered                       0.377169
dtype: float64

varnodes_metatype_ARRAY (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=ARRAY),Decompiler varnodes matched @ level=NO_MATCH (metatype=ARRAY),Decompiler varnodes matched @ level=OVERLAP (metatype=ARRAY),Decompiler varnodes matched @ level=SUBSET (metatype=ARRAY),Decompiler varnodes matched @ level=ALIGNED (metatype=ARRAY),Decompiler varnodes matched @ level=MATCH (metatype=ARRAY),"Varnode average compare score [0,1] (metatype=ARRAY)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,14,7,9,0,2,0.257812,0.5625,0.0625
nohup,13,4,3,5,0,1,0.326923,0.692308,0.076923
pinky,17,6,7,3,0,1,0.25,0.647059,0.058824
csplit,35,8,13,6,0,8,0.407143,0.771429,0.228571
fmt,12,5,3,3,0,1,0.270833,0.583333,0.083333
df,25,7,9,6,1,2,0.32,0.72,0.08
join,18,7,5,3,0,3,0.319444,0.611111,0.166667
expr,33,8,9,8,0,8,0.431818,0.757576,0.242424
seq,15,7,2,4,0,2,0.3,0.533333,0.133333
unexpand,11,4,2,4,0,1,0.318182,0.636364,0.090909


Varnode average compare score [0,1] (metatype=ARRAY)    0.302458
Varnodes fraction partially recovered                   0.618656
Varnodes fraction exactly recovered                     0.107877
dtype: float64

varnodes_metatype_STRUCT (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=STRUCT),Decompiler varnodes matched @ level=NO_MATCH (metatype=STRUCT),Decompiler varnodes matched @ level=OVERLAP (metatype=STRUCT),Decompiler varnodes matched @ level=SUBSET (metatype=STRUCT),Decompiler varnodes matched @ level=ALIGNED (metatype=STRUCT),Decompiler varnodes matched @ level=MATCH (metatype=STRUCT),"Varnode average compare score [0,1] (metatype=STRUCT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,1,13,11,6,1,0.445312,0.96875,0.03125
nohup,10,1,5,1,3,0,0.4,0.9,0.0
pinky,12,2,5,2,3,0,0.375,0.833333,0.0
csplit,56,1,29,11,6,9,0.46875,0.982143,0.160714
fmt,10,1,5,1,3,0,0.4,0.9,0.0
df,37,2,13,9,9,4,0.5,0.945946,0.108108
join,14,2,7,2,3,0,0.357143,0.857143,0.0
expr,59,1,28,19,6,5,0.440678,0.983051,0.084746
seq,17,1,9,4,3,0,0.382353,0.941176,0.0
unexpand,10,1,5,1,3,0,0.4,0.9,0.0


Varnode average compare score [0,1] (metatype=STRUCT)    0.421284
Varnodes fraction partially recovered                    0.920584
Varnodes fraction exactly recovered                      0.043582
dtype: float64

varnodes_metatype_UNION (optimization=0, stripped=True, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=UNION),Decompiler varnodes matched @ level=NO_MATCH (metatype=UNION),Decompiler varnodes matched @ level=OVERLAP (metatype=UNION),Decompiler varnodes matched @ level=SUBSET (metatype=UNION),Decompiler varnodes matched @ level=ALIGNED (metatype=UNION),Decompiler varnodes matched @ level=MATCH (metatype=UNION),"Varnode average compare score [0,1] (metatype=UNION)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,0,0,0,0,0,0,,,
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,0,0,0,0,0,0,,,
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,0,0,0,0,0,0,,,
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=UNION)   NaN
Varnodes fraction partially recovered                  NaN
Varnodes fraction exactly recovered                    NaN
dtype: float64

Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.009011,0.000795,0.0,0.570103,0.42009
FLOAT,0.0,0.485714,0.0,0.371429,0.142857
POINTER,0.016878,0.000603,0.0,0.608198,0.374322
ARRAY,0.450363,0.213075,0.225182,0.002421,0.108959
STRUCT,0.056213,0.473373,0.204142,0.195266,0.071006


---------- HIGH-LEVEL VARNODES ANALYSIS (optimization=0, stripped=False, debug=False) ----------
varnodes (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=NO_MATCH,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,"Varnode average comparison score [0,1]",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,608,5,21,14,321,247,0.822368,0.991776,0.40625
nohup,162,1,7,4,105,45,0.787037,0.993827,0.277778
pinky,182,1,11,2,110,58,0.792582,0.994505,0.318681
csplit,1003,8,41,15,544,395,0.818295,0.992024,0.393819
fmt,186,1,6,2,110,67,0.817204,0.994624,0.360215
df,646,1,20,11,287,327,0.85565,0.998452,0.506192
join,260,1,8,3,151,97,0.822115,0.996154,0.373077
expr,932,8,37,25,499,363,0.814378,0.991416,0.389485
seq,279,1,20,6,128,124,0.817204,0.996416,0.444444
unexpand,158,1,6,2,92,57,0.813291,0.993671,0.360759


Varnode average comparison score [0,1]    0.808120
Varnodes fraction partially recovered     0.985349
Varnodes fraction exactly recovered       0.376531
dtype: float64

varnodes_metatype_INT (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=INT),Decompiler varnodes matched @ level=NO_MATCH (metatype=INT),Decompiler varnodes matched @ level=OVERLAP (metatype=INT),Decompiler varnodes matched @ level=SUBSET (metatype=INT),Decompiler varnodes matched @ level=ALIGNED (metatype=INT),Decompiler varnodes matched @ level=MATCH (metatype=INT),"Varnode average compare score [0,1] (metatype=INT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,392,0,1,0,237,154,0.846939,1.0,0.392857
nohup,99,0,0,0,71,28,0.820707,1.0,0.282828
pinky,96,0,0,0,65,31,0.830729,1.0,0.322917
csplit,632,0,1,0,331,300,0.86788,1.0,0.474684
fmt,112,0,0,0,72,40,0.839286,1.0,0.357143
df,327,0,0,0,158,169,0.879205,1.0,0.51682
join,162,0,0,0,104,58,0.839506,1.0,0.358025
expr,573,0,1,0,292,280,0.871291,1.0,0.488656
seq,156,0,0,0,81,75,0.870192,1.0,0.480769
unexpand,101,0,0,0,61,40,0.84901,1.0,0.39604


Varnode average compare score [0,1] (metatype=INT)    0.847012
Varnodes fraction partially recovered                 0.996366
Varnodes fraction exactly recovered                   0.399686
dtype: float64

varnodes_metatype_FLOAT (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=FLOAT),Decompiler varnodes matched @ level=NO_MATCH (metatype=FLOAT),Decompiler varnodes matched @ level=OVERLAP (metatype=FLOAT),Decompiler varnodes matched @ level=SUBSET (metatype=FLOAT),Decompiler varnodes matched @ level=ALIGNED (metatype=FLOAT),Decompiler varnodes matched @ level=MATCH (metatype=FLOAT),"Varnode average compare score [0,1] (metatype=FLOAT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,2,0,1,0,1,0,0.5,1.0,0.0
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,13,0,3,0,5,5,0.730769,1.0,0.384615
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,10,0,9,0,1,0,0.3,1.0,0.0
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=FLOAT)    0.519017
Varnodes fraction partially recovered                   1.000000
Varnodes fraction exactly recovered                     0.064103
dtype: float64

varnodes_metatype_POINTER (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=POINTER),Decompiler varnodes matched @ level=NO_MATCH (metatype=POINTER),Decompiler varnodes matched @ level=OVERLAP (metatype=POINTER),Decompiler varnodes matched @ level=SUBSET (metatype=POINTER),Decompiler varnodes matched @ level=ALIGNED (metatype=POINTER),Decompiler varnodes matched @ level=MATCH (metatype=POINTER),"Varnode average compare score [0,1] (metatype=POINTER)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,150,0,0,0,74,76,0.876667,1.0,0.506667
nohup,40,0,0,0,28,12,0.825,1.0,0.3
pinky,57,0,0,0,38,19,0.833333,1.0,0.333333
csplit,280,6,0,0,203,71,0.797321,0.978571,0.253571
fmt,52,0,0,0,32,20,0.846154,1.0,0.384615
df,244,0,0,0,110,134,0.887295,1.0,0.54918
join,66,0,0,0,38,28,0.856061,1.0,0.424242
expr,267,6,0,0,198,63,0.792135,0.977528,0.235955
seq,81,0,1,0,39,41,0.87037,1.0,0.506173
unexpand,36,0,0,0,25,11,0.826389,1.0,0.305556


Varnode average compare score [0,1] (metatype=POINTER)    0.835681
Varnodes fraction partially recovered                     0.989033
Varnodes fraction exactly recovered                       0.377169
dtype: float64

varnodes_metatype_ARRAY (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=ARRAY),Decompiler varnodes matched @ level=NO_MATCH (metatype=ARRAY),Decompiler varnodes matched @ level=OVERLAP (metatype=ARRAY),Decompiler varnodes matched @ level=SUBSET (metatype=ARRAY),Decompiler varnodes matched @ level=ALIGNED (metatype=ARRAY),Decompiler varnodes matched @ level=MATCH (metatype=ARRAY),"Varnode average compare score [0,1] (metatype=ARRAY)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,5,7,6,0,14,0.585938,0.84375,0.4375
nohup,13,1,3,4,0,5,0.596154,0.923077,0.384615
pinky,17,1,7,2,0,7,0.573529,0.941176,0.411765
csplit,35,2,13,5,0,15,0.592857,0.942857,0.428571
fmt,12,1,2,2,0,7,0.708333,0.916667,0.583333
df,25,1,9,3,2,10,0.61,0.96,0.4
join,18,1,3,2,1,11,0.75,0.944444,0.611111
expr,33,2,9,7,0,15,0.628788,0.939394,0.454545
seq,15,1,2,3,1,8,0.716667,0.933333,0.533333
unexpand,11,1,2,2,0,6,0.681818,0.909091,0.545455


Varnode average compare score [0,1] (metatype=ARRAY)    0.625736
Varnodes fraction partially recovered                   0.894293
Varnodes fraction exactly recovered                     0.461824
dtype: float64

varnodes_metatype_STRUCT (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=STRUCT),Decompiler varnodes matched @ level=NO_MATCH (metatype=STRUCT),Decompiler varnodes matched @ level=OVERLAP (metatype=STRUCT),Decompiler varnodes matched @ level=SUBSET (metatype=STRUCT),Decompiler varnodes matched @ level=ALIGNED (metatype=STRUCT),Decompiler varnodes matched @ level=MATCH (metatype=STRUCT),"Varnode average compare score [0,1] (metatype=STRUCT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,0,12,8,9,3,0.523438,1.0,0.09375
nohup,10,0,4,0,6,0,0.55,1.0,0.0
pinky,12,0,4,0,7,1,0.604167,1.0,0.083333
csplit,56,0,27,10,10,9,0.504464,1.0,0.160714
fmt,10,0,4,0,6,0,0.55,1.0,0.0
df,37,0,8,8,12,9,0.648649,1.0,0.243243
join,14,0,5,1,8,0,0.553571,1.0,0.0
expr,59,0,27,18,9,5,0.466102,1.0,0.084746
seq,17,0,8,3,6,0,0.470588,1.0,0.0
unexpand,10,0,4,0,6,0,0.55,1.0,0.0


Varnode average compare score [0,1] (metatype=STRUCT)    0.546677
Varnodes fraction partially recovered                    1.000000
Varnodes fraction exactly recovered                      0.065049
dtype: float64

varnodes_metatype_UNION (optimization=0, stripped=False, debug=False)


Unnamed: 0,Ground truth varnodes (metatype=UNION),Decompiler varnodes matched @ level=NO_MATCH (metatype=UNION),Decompiler varnodes matched @ level=OVERLAP (metatype=UNION),Decompiler varnodes matched @ level=SUBSET (metatype=UNION),Decompiler varnodes matched @ level=ALIGNED (metatype=UNION),Decompiler varnodes matched @ level=MATCH (metatype=UNION),"Varnode average compare score [0,1] (metatype=UNION)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,0,0,0,0,0,0,,,
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,0,0,0,0,0,0,,,
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,0,0,0,0,0,0,,,
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=UNION)   NaN
Varnodes fraction partially recovered                  NaN
Varnodes fraction exactly recovered                    NaN
dtype: float64

Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007951,0.000795,0.0,0.571959,0.419295
FLOAT,0.0,0.485714,0.0,0.371429,0.142857
POINTER,0.016275,0.000603,0.0,0.6088,0.374322
ARRAY,0.22276,0.20339,0.152542,0.01937,0.401937
STRUCT,0.0,0.408284,0.14497,0.349112,0.097633


---------- HIGH-LEVEL VARNODES ANALYSIS (optimization=0, stripped=False, debug=True) ----------
varnodes (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=NO_MATCH,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,"Varnode average comparison score [0,1]",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,608,0,0,1,0,607,0.999178,1.0,0.998355
nohup,162,0,0,0,0,162,1.0,1.0,1.0
pinky,182,0,0,1,0,181,0.997253,1.0,0.994505
csplit,1003,0,1,1,6,995,0.997258,1.0,0.992024
fmt,186,0,0,0,0,186,1.0,1.0,1.0
df,646,0,2,0,0,644,0.997678,1.0,0.996904
join,260,0,0,0,0,260,1.0,1.0,1.0
expr,932,0,1,1,6,924,0.997049,1.0,0.991416
seq,279,0,0,0,0,279,1.0,1.0,1.0
unexpand,158,0,0,0,0,158,1.0,1.0,1.0


Varnode average comparison score [0,1]    0.991103
Varnodes fraction partially recovered     0.991827
Varnodes fraction exactly recovered       0.990152
dtype: float64

varnodes_metatype_INT (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=INT),Decompiler varnodes matched @ level=NO_MATCH (metatype=INT),Decompiler varnodes matched @ level=OVERLAP (metatype=INT),Decompiler varnodes matched @ level=SUBSET (metatype=INT),Decompiler varnodes matched @ level=ALIGNED (metatype=INT),Decompiler varnodes matched @ level=MATCH (metatype=INT),"Varnode average compare score [0,1] (metatype=INT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,392,0,0,0,0,392,1.0,1.0,1.0
nohup,99,0,0,0,0,99,1.0,1.0,1.0
pinky,96,0,0,0,0,96,1.0,1.0,1.0
csplit,632,0,1,0,6,625,0.99644,1.0,0.988924
fmt,112,0,0,0,0,112,1.0,1.0,1.0
df,327,0,0,0,0,327,1.0,1.0,1.0
join,162,0,0,0,0,162,1.0,1.0,1.0
expr,573,0,1,0,6,566,0.996073,1.0,0.987784
seq,156,0,0,0,0,156,1.0,1.0,1.0
unexpand,101,0,0,0,0,101,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=INT)    0.996141
Varnodes fraction partially recovered                 0.996609
Varnodes fraction exactly recovered                   0.995153
dtype: float64

varnodes_metatype_FLOAT (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=FLOAT),Decompiler varnodes matched @ level=NO_MATCH (metatype=FLOAT),Decompiler varnodes matched @ level=OVERLAP (metatype=FLOAT),Decompiler varnodes matched @ level=SUBSET (metatype=FLOAT),Decompiler varnodes matched @ level=ALIGNED (metatype=FLOAT),Decompiler varnodes matched @ level=MATCH (metatype=FLOAT),"Varnode average compare score [0,1] (metatype=FLOAT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,2,0,0,0,0,2,1.0,1.0,1.0
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,13,0,0,0,0,13,1.0,1.0,1.0
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,10,0,0,0,0,10,1.0,1.0,1.0
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=FLOAT)    1.0
Varnodes fraction partially recovered                   1.0
Varnodes fraction exactly recovered                     1.0
dtype: float64

varnodes_metatype_POINTER (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=POINTER),Decompiler varnodes matched @ level=NO_MATCH (metatype=POINTER),Decompiler varnodes matched @ level=OVERLAP (metatype=POINTER),Decompiler varnodes matched @ level=SUBSET (metatype=POINTER),Decompiler varnodes matched @ level=ALIGNED (metatype=POINTER),Decompiler varnodes matched @ level=MATCH (metatype=POINTER),"Varnode average compare score [0,1] (metatype=POINTER)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,150,0,0,0,0,150,1.0,1.0,1.0
nohup,40,0,0,0,0,40,1.0,1.0,1.0
pinky,57,0,0,0,0,57,1.0,1.0,1.0
csplit,280,0,0,0,0,280,1.0,1.0,1.0
fmt,52,0,0,0,0,52,1.0,1.0,1.0
df,244,0,0,0,0,244,1.0,1.0,1.0
join,66,0,0,0,0,66,1.0,1.0,1.0
expr,267,0,0,0,0,267,1.0,1.0,1.0
seq,81,0,0,0,0,81,1.0,1.0,1.0
unexpand,36,0,0,0,0,36,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=POINTER)    0.991776
Varnodes fraction partially recovered                     0.991776
Varnodes fraction exactly recovered                       0.991776
dtype: float64

varnodes_metatype_ARRAY (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=ARRAY),Decompiler varnodes matched @ level=NO_MATCH (metatype=ARRAY),Decompiler varnodes matched @ level=OVERLAP (metatype=ARRAY),Decompiler varnodes matched @ level=SUBSET (metatype=ARRAY),Decompiler varnodes matched @ level=ALIGNED (metatype=ARRAY),Decompiler varnodes matched @ level=MATCH (metatype=ARRAY),"Varnode average compare score [0,1] (metatype=ARRAY)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,0,0,1,0,31,0.984375,1.0,0.96875
nohup,13,0,0,0,0,13,1.0,1.0,1.0
pinky,17,0,0,1,0,16,0.970588,1.0,0.941176
csplit,35,0,0,1,0,34,0.985714,1.0,0.971429
fmt,12,0,0,0,0,12,1.0,1.0,1.0
df,25,0,1,0,0,24,0.97,1.0,0.96
join,18,0,0,0,0,18,1.0,1.0,1.0
expr,33,0,0,1,0,32,0.984848,1.0,0.969697
seq,15,0,0,0,0,15,1.0,1.0,1.0
unexpand,11,0,0,0,0,11,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=ARRAY)    0.963970
Varnodes fraction partially recovered                   0.970500
Varnodes fraction exactly recovered                     0.958691
dtype: float64

varnodes_metatype_STRUCT (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=STRUCT),Decompiler varnodes matched @ level=NO_MATCH (metatype=STRUCT),Decompiler varnodes matched @ level=OVERLAP (metatype=STRUCT),Decompiler varnodes matched @ level=SUBSET (metatype=STRUCT),Decompiler varnodes matched @ level=ALIGNED (metatype=STRUCT),Decompiler varnodes matched @ level=MATCH (metatype=STRUCT),"Varnode average compare score [0,1] (metatype=STRUCT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,0,0,0,0,32,1.0,1.0,1.0
nohup,10,0,0,0,0,10,1.0,1.0,1.0
pinky,12,0,0,0,0,12,1.0,1.0,1.0
csplit,56,0,0,0,0,56,1.0,1.0,1.0
fmt,10,0,0,0,0,10,1.0,1.0,1.0
df,37,0,1,0,0,36,0.97973,1.0,0.972973
join,14,0,0,0,0,14,1.0,1.0,1.0
expr,59,0,0,0,0,59,1.0,1.0,1.0
seq,17,0,0,0,0,17,1.0,1.0,1.0
unexpand,10,0,0,0,0,10,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=STRUCT)    0.998733
Varnodes fraction partially recovered                    1.000000
Varnodes fraction exactly recovered                      0.998311
dtype: float64

varnodes_metatype_UNION (optimization=0, stripped=False, debug=True)


Unnamed: 0,Ground truth varnodes (metatype=UNION),Decompiler varnodes matched @ level=NO_MATCH (metatype=UNION),Decompiler varnodes matched @ level=OVERLAP (metatype=UNION),Decompiler varnodes matched @ level=SUBSET (metatype=UNION),Decompiler varnodes matched @ level=ALIGNED (metatype=UNION),Decompiler varnodes matched @ level=MATCH (metatype=UNION),"Varnode average compare score [0,1] (metatype=UNION)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,0,0,0,0,0,0,,,
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,0,0,0,0,0,0,,,
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,0,0,0,0,0,0,,,
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=UNION)   NaN
Varnodes fraction partially recovered                  NaN
Varnodes fraction exactly recovered                    NaN
dtype: float64

Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.007421,0.00053,0.0,0.00318,0.988868
FLOAT,0.0,0.0,0.0,0.0,1.0
POINTER,0.009042,0.0,0.0,0.0,0.990958
ARRAY,0.142857,0.002421,0.009685,0.0,0.845036
STRUCT,0.0,0.002959,0.0,0.0,0.997041


In [63]:
analyzed_opts_sets = (strip_opts, debug_opts)

for opts in (debug_opts,):
    for grp in varnodes_groups_metatypes:
        display(get_table(grp, opts))
        display(get_varnode_group_average_stats(grp))
    
    display(load_table("metatype_match_levels_ratios", opts))
# display_analyzed_tables(
#     decomposed_varnodes_groups,
#     (strip_opts,),
#     analyzed_only=True
# )

Unnamed: 0,Ground truth varnodes (metatype=INT),Decompiler varnodes matched @ level=NO_MATCH (metatype=INT),Decompiler varnodes matched @ level=OVERLAP (metatype=INT),Decompiler varnodes matched @ level=SUBSET (metatype=INT),Decompiler varnodes matched @ level=ALIGNED (metatype=INT),Decompiler varnodes matched @ level=MATCH (metatype=INT),"Varnode average compare score [0,1] (metatype=INT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,392,0,0,0,0,392,1.0,1.0,1.0
nohup,99,0,0,0,0,99,1.0,1.0,1.0
pinky,96,0,0,0,0,96,1.0,1.0,1.0
csplit,632,0,1,0,6,625,0.99644,1.0,0.988924
fmt,112,0,0,0,0,112,1.0,1.0,1.0
df,327,0,0,0,0,327,1.0,1.0,1.0
join,162,0,0,0,0,162,1.0,1.0,1.0
expr,573,0,1,0,6,566,0.996073,1.0,0.987784
seq,156,0,0,0,0,156,1.0,1.0,1.0
unexpand,101,0,0,0,0,101,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=INT)    0.996141
Varnodes fraction partially recovered                 0.996609
Varnodes fraction exactly recovered                   0.995153
dtype: float64

Unnamed: 0,Ground truth varnodes (metatype=FLOAT),Decompiler varnodes matched @ level=NO_MATCH (metatype=FLOAT),Decompiler varnodes matched @ level=OVERLAP (metatype=FLOAT),Decompiler varnodes matched @ level=SUBSET (metatype=FLOAT),Decompiler varnodes matched @ level=ALIGNED (metatype=FLOAT),Decompiler varnodes matched @ level=MATCH (metatype=FLOAT),"Varnode average compare score [0,1] (metatype=FLOAT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,2,0,0,0,0,2,1.0,1.0,1.0
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,13,0,0,0,0,13,1.0,1.0,1.0
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,10,0,0,0,0,10,1.0,1.0,1.0
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=FLOAT)    1.0
Varnodes fraction partially recovered                   1.0
Varnodes fraction exactly recovered                     1.0
dtype: float64

Unnamed: 0,Ground truth varnodes (metatype=POINTER),Decompiler varnodes matched @ level=NO_MATCH (metatype=POINTER),Decompiler varnodes matched @ level=OVERLAP (metatype=POINTER),Decompiler varnodes matched @ level=SUBSET (metatype=POINTER),Decompiler varnodes matched @ level=ALIGNED (metatype=POINTER),Decompiler varnodes matched @ level=MATCH (metatype=POINTER),"Varnode average compare score [0,1] (metatype=POINTER)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,150,0,0,0,0,150,1.0,1.0,1.0
nohup,40,0,0,0,0,40,1.0,1.0,1.0
pinky,57,0,0,0,0,57,1.0,1.0,1.0
csplit,280,0,0,0,0,280,1.0,1.0,1.0
fmt,52,0,0,0,0,52,1.0,1.0,1.0
df,244,0,0,0,0,244,1.0,1.0,1.0
join,66,0,0,0,0,66,1.0,1.0,1.0
expr,267,0,0,0,0,267,1.0,1.0,1.0
seq,81,0,0,0,0,81,1.0,1.0,1.0
unexpand,36,0,0,0,0,36,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=POINTER)    0.991776
Varnodes fraction partially recovered                     0.991776
Varnodes fraction exactly recovered                       0.991776
dtype: float64

Unnamed: 0,Ground truth varnodes (metatype=ARRAY),Decompiler varnodes matched @ level=NO_MATCH (metatype=ARRAY),Decompiler varnodes matched @ level=OVERLAP (metatype=ARRAY),Decompiler varnodes matched @ level=SUBSET (metatype=ARRAY),Decompiler varnodes matched @ level=ALIGNED (metatype=ARRAY),Decompiler varnodes matched @ level=MATCH (metatype=ARRAY),"Varnode average compare score [0,1] (metatype=ARRAY)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,0,0,1,0,31,0.984375,1.0,0.96875
nohup,13,0,0,0,0,13,1.0,1.0,1.0
pinky,17,0,0,1,0,16,0.970588,1.0,0.941176
csplit,35,0,0,1,0,34,0.985714,1.0,0.971429
fmt,12,0,0,0,0,12,1.0,1.0,1.0
df,25,0,1,0,0,24,0.97,1.0,0.96
join,18,0,0,0,0,18,1.0,1.0,1.0
expr,33,0,0,1,0,32,0.984848,1.0,0.969697
seq,15,0,0,0,0,15,1.0,1.0,1.0
unexpand,11,0,0,0,0,11,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=ARRAY)    0.963970
Varnodes fraction partially recovered                   0.970500
Varnodes fraction exactly recovered                     0.958691
dtype: float64

Unnamed: 0,Ground truth varnodes (metatype=STRUCT),Decompiler varnodes matched @ level=NO_MATCH (metatype=STRUCT),Decompiler varnodes matched @ level=OVERLAP (metatype=STRUCT),Decompiler varnodes matched @ level=SUBSET (metatype=STRUCT),Decompiler varnodes matched @ level=ALIGNED (metatype=STRUCT),Decompiler varnodes matched @ level=MATCH (metatype=STRUCT),"Varnode average compare score [0,1] (metatype=STRUCT)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,32,0,0,0,0,32,1.0,1.0,1.0
nohup,10,0,0,0,0,10,1.0,1.0,1.0
pinky,12,0,0,0,0,12,1.0,1.0,1.0
csplit,56,0,0,0,0,56,1.0,1.0,1.0
fmt,10,0,0,0,0,10,1.0,1.0,1.0
df,37,0,1,0,0,36,0.97973,1.0,0.972973
join,14,0,0,0,0,14,1.0,1.0,1.0
expr,59,0,0,0,0,59,1.0,1.0,1.0
seq,17,0,0,0,0,17,1.0,1.0,1.0
unexpand,10,0,0,0,0,10,1.0,1.0,1.0


Varnode average compare score [0,1] (metatype=STRUCT)    0.998733
Varnodes fraction partially recovered                    1.000000
Varnodes fraction exactly recovered                      0.998311
dtype: float64

Unnamed: 0,Ground truth varnodes (metatype=UNION),Decompiler varnodes matched @ level=NO_MATCH (metatype=UNION),Decompiler varnodes matched @ level=OVERLAP (metatype=UNION),Decompiler varnodes matched @ level=SUBSET (metatype=UNION),Decompiler varnodes matched @ level=ALIGNED (metatype=UNION),Decompiler varnodes matched @ level=MATCH (metatype=UNION),"Varnode average compare score [0,1] (metatype=UNION)",Varnodes fraction partially recovered,Varnodes fraction exactly recovered
stat,0,0,0,0,0,0,,,
nohup,0,0,0,0,0,0,,,
pinky,0,0,0,0,0,0,,,
csplit,0,0,0,0,0,0,,,
fmt,0,0,0,0,0,0,,,
df,0,0,0,0,0,0,,,
join,0,0,0,0,0,0,,,
expr,0,0,0,0,0,0,,,
seq,0,0,0,0,0,0,,,
unexpand,0,0,0,0,0,0,,,


Varnode average compare score [0,1] (metatype=UNION)   NaN
Varnodes fraction partially recovered                  NaN
Varnodes fraction exactly recovered                    NaN
dtype: float64

Unnamed: 0,NO_MATCH,OVERLAP,SUBSET,ALIGNED,MATCH
INT,0.001327,0.00128,0.0,0.001896,0.995497
FLOAT,0.0,0.0,0.0,0.0,1.0
POINTER,0.001579,0.0,0.0,0.000105,0.998316
ARRAY,0.026916,0.007755,0.010949,0.0,0.95438
STRUCT,0.0,0.003935,0.0,0.0,0.996065


In [64]:
cmp = load_cmp(prog_from_progname("cksum"), debug_opts)
truth = sum([ varnode.get_size() for varnode in varnodes_truth(cmp) ])
missed = sum([ varnode.get_size() for varnode in varnodes_missed(cmp) ])
overlapped = varnode_compare_records_matched_at_level(cmp, VarnodeCompareLevel.OVERLAP)
for varnode in varnodes_missed(cmp):
    print(varnode.get_var().get_parent_function().get_name())

# for record in overlapped:
#     print(record.get_varnode().get_var().get_parent_function().get_name())
# print(overlapped)

cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul
cksum_pclmul