In [1]:
# imports

# libs
from IPython.display import display
import numpy as np
import pandas as pd

# local
from build_parse import *
from metrics import *

In [2]:
# options for display, etc.
pd.set_option('display.max_rows', None)

In [3]:
# prognames = [ "ndarray", "typecases", "p0", "structcases" ]
# progs = [ ToyProgram(progname) for progname in prognames ]

progs = COREUTILS_PROGS
prog_names = [ prog.get_name() for prog in progs ]

opts = BuildOptions()
dwarf_opts = BuildOptions(debug=True, strip=False, optimization=opts.optimization)
for prog in progs:
    prog.build_if_not_valid(opts)
    prog.build_if_not_valid(dwarf_opts)

metrics_groups = make_metrics()

datadir = CODEDIR.joinpath("data")
if not datadir.exists():
    datadir.mkdir()

In [4]:
# compute the DWARF vs Ghidra comparisons for each program, given the build options
cmps = [ parse_compare_program(prog, opts, decompiler="ghidra") for prog in progs ]


dfs = []
for i, metrics_grp in enumerate(metrics_groups):
    df = compute_comparisons_metrics_dataframe(
        prog_names,
        cmps,
        metrics_grp
    )
    csv_out_path = datadir.joinpath("{}.csv".format(i))
    df.to_csv(csv_out_path)
    dfs.append(df)


In [5]:
for metrics_grp, df in zip(metrics_groups, dfs):
    print("{} {} {}".format("-"*10, metrics_grp.get_name(), "-"*10))
    display(df)

---------- DATA BYTES ----------


Unnamed: 0,Ground truth data bytes,Overlapped data bytes,Missed data bytes
[,4463,4320,143
b2sum,4472,4304,168
base32,2671,2548,123
base64,2699,2608,91
basename,2193,2102,91
basenc,3793,3423,370
cat,2644,2553,91
chcon,20631,20496,135
chgrp,4803,4651,152
chmod,4776,4602,174


---------- FUNCTIONS ----------


Unnamed: 0,Ground truth functions,Found functions,Missed functions
[,152,152,0
b2sum,148,148,0
base32,128,128,0
base64,129,129,0
basename,111,111,0
basenc,171,171,0
cat,124,124,0
chcon,247,247,0
chgrp,216,216,0
chmod,214,214,0


---------- VARNODES ----------


Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,Missed varnodes,"Varnode average comparison score [0,1]"
[,266,18,5,128,114,1,0.815789
b2sum,237,8,7,118,103,1,0.831224
base32,160,6,2,82,68,2,0.825
base64,160,6,2,82,69,1,0.83125
basename,129,6,2,77,43,1,0.800388
basenc,219,8,2,106,98,5,0.824201
cat,164,6,2,98,57,1,0.810976
chcon,363,14,4,234,109,2,0.798898
chgrp,339,11,5,204,117,2,0.811947
chmod,347,12,7,211,114,3,0.803314


---------- VARNODES (metatype = INT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=INT,Missed varnodes w/ metatype=INT,Decompiler varnodes w/ metatype=INT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=INT matched @ level=SUBSET,Decompiler varnodes w/ metatype=INT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=INT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=INT"
[,157,0,0,0,83,74,0.867834
b2sum,147,0,0,0,80,67,0.863946
base32,96,0,0,0,51,45,0.867188
base64,96,0,0,0,51,45,0.867188
basename,71,0,0,0,47,24,0.834507
basenc,133,0,1,0,70,62,0.862782
cat,101,0,0,0,64,37,0.841584
chcon,185,0,0,0,124,61,0.832432
chgrp,166,0,0,0,102,64,0.846386
chmod,176,0,0,0,110,66,0.84375


---------- VARNODES (metatype = FLOAT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=FLOAT,Missed varnodes w/ metatype=FLOAT,Decompiler varnodes w/ metatype=FLOAT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=FLOAT matched @ level=SUBSET,Decompiler varnodes w/ metatype=FLOAT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=FLOAT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=FLOAT"
[,2,0,1,0,1,0,0.5
b2sum,0,0,0,0,0,0,
base32,0,0,0,0,0,0,
base64,0,0,0,0,0,0,
basename,0,0,0,0,0,0,
basenc,0,0,0,0,0,0,
cat,0,0,0,0,0,0,
chcon,3,0,0,0,3,0,0.75
chgrp,3,0,0,0,3,0,0.75
chmod,3,0,0,0,3,0,0.75


---------- VARNODES (metatype = POINTER) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=POINTER,Missed varnodes w/ metatype=POINTER,Decompiler varnodes w/ metatype=POINTER matched @ level=OVERLAP,Decompiler varnodes w/ metatype=POINTER matched @ level=SUBSET,Decompiler varnodes w/ metatype=POINTER matched @ level=ALIGNED,Decompiler varnodes w/ metatype=POINTER matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=POINTER"
[,68,0,0,0,38,30,0.860294
b2sum,56,0,0,0,32,24,0.857143
base32,41,0,0,0,24,17,0.853659
base64,41,0,0,0,24,17,0.853659
basename,38,0,0,0,24,14,0.842105
basenc,55,0,0,0,30,25,0.863636
cat,41,0,0,0,28,13,0.829268
chcon,134,1,0,0,93,40,0.81903
chgrp,133,1,0,0,90,42,0.823308
chmod,127,1,0,0,89,37,0.816929


---------- VARNODES (metatype = ARRAY) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=ARRAY,Missed varnodes w/ metatype=ARRAY,Decompiler varnodes w/ metatype=ARRAY matched @ level=OVERLAP,Decompiler varnodes w/ metatype=ARRAY matched @ level=SUBSET,Decompiler varnodes w/ metatype=ARRAY matched @ level=ALIGNED,Decompiler varnodes w/ metatype=ARRAY matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=ARRAY"
[,15,1,5,3,0,6,0.583333
b2sum,24,1,4,7,0,12,0.6875
base32,12,2,2,2,0,6,0.625
base64,12,1,2,2,0,7,0.708333
basename,10,1,2,2,0,5,0.65
basenc,20,5,2,2,0,11,0.625
cat,11,1,2,2,0,6,0.681818
chcon,18,1,10,2,0,5,0.472222
chgrp,15,1,7,2,0,5,0.516667
chmod,19,2,8,4,0,5,0.473684


---------- VARNODES (metatype = STRUCT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=STRUCT,Missed varnodes w/ metatype=STRUCT,Decompiler varnodes w/ metatype=STRUCT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=STRUCT matched @ level=SUBSET,Decompiler varnodes w/ metatype=STRUCT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=STRUCT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=STRUCT"
[,24,0,12,2,6,4,0.520833
b2sum,10,0,4,0,6,0,0.55
base32,11,0,4,0,7,0,0.568182
base64,11,0,4,0,7,0,0.568182
basename,10,0,4,0,6,0,0.55
basenc,11,0,5,0,6,0,0.522727
cat,11,0,4,0,6,1,0.590909
chcon,23,0,4,2,14,3,0.673913
chgrp,22,0,4,3,9,6,0.693182
chmod,22,0,4,3,9,6,0.693182


---------- VARNODES (metatype = UNION) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=UNION,Missed varnodes w/ metatype=UNION,Decompiler varnodes w/ metatype=UNION matched @ level=OVERLAP,Decompiler varnodes w/ metatype=UNION matched @ level=SUBSET,Decompiler varnodes w/ metatype=UNION matched @ level=ALIGNED,Decompiler varnodes w/ metatype=UNION matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=UNION"
[,0,0,0,0,0,0,
b2sum,0,0,0,0,0,0,
base32,0,0,0,0,0,0,
base64,0,0,0,0,0,0,
basename,0,0,0,0,0,0,
basenc,0,0,0,0,0,0,
cat,0,0,0,0,0,0,
chcon,0,0,0,0,0,0,
chgrp,0,0,0,0,0,0,
chmod,0,0,0,0,0,0,


---------- VARNODES (metatype = UNDEFINED) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=UNDEFINED,Missed varnodes w/ metatype=UNDEFINED,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=OVERLAP,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=SUBSET,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=ALIGNED,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=UNDEFINED"
[,0,0,0,0,0,0,
b2sum,0,0,0,0,0,0,
base32,0,0,0,0,0,0,
base64,0,0,0,0,0,0,
basename,0,0,0,0,0,0,
basenc,0,0,0,0,0,0,
cat,0,0,0,0,0,0,
chcon,0,0,0,0,0,0,
chgrp,0,0,0,0,0,0,
chmod,0,0,0,0,0,0,


---------- PRIMITIVE VARNODES ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes,Decomposed (primitive) varnodes matched @ level=OVERLAP,Decomposed (primitive) varnodes matched @ level=SUBSET,Decomposed (primitive) varnodes matched @ level=ALIGNED,Decomposed (primitive) varnodes matched @ level=MATCH,Missed decomposed (primitive) varnodes,"Decomposed (primitive) varnode average comparison score [0,1]"
[,1190,152,0,484,507,47,0.763025
b2sum,1954,277,0,1234,396,47,0.711745
base32,1169,145,0,609,336,79,0.709153
base64,1197,141,0,673,336,47,0.73183
basename,844,137,0,349,311,47,0.719194
basenc,1847,171,0,1003,365,308,0.628045
cat,933,153,0,390,343,47,0.722133
chcon,17348,180,0,16645,474,49,0.749524
chgrp,1264,171,0,539,504,50,0.752373
chmod,1305,163,0,543,529,70,0.748659


---------- PRIMITIVE VARNODES (metatype = INT) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=INT,Missed decomposed (primitive) varnodes w/ metatype=INT,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=INT"
[,1010,47,123,0,424,416,0.757178
b2sum,1813,47,228,0,1180,358,0.717044
base32,1061,79,112,0,565,305,0.713242
base64,1089,47,108,0,629,305,0.738062
basename,739,47,104,0,305,283,0.727673
basenc,1707,308,121,0,952,326,0.626977
cat,817,47,112,0,342,316,0.735006
chcon,17106,47,123,0,16523,413,0.75038
chgrp,1026,47,120,0,418,441,0.76462
chmod,1080,68,116,0,425,471,0.758102


---------- PRIMITIVE VARNODES (metatype = FLOAT) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=FLOAT,Missed decomposed (primitive) varnodes w/ metatype=FLOAT,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=FLOAT"
[,2,0,1,0,1,0,0.5
b2sum,0,0,0,0,0,0,
base32,0,0,0,0,0,0,
base64,0,0,0,0,0,0,
basename,0,0,0,0,0,0,
basenc,0,0,0,0,0,0,
cat,0,0,0,0,0,0,
chcon,7,0,4,0,3,0,0.464286
chgrp,7,0,4,0,3,0,0.464286
chmod,7,0,4,0,3,0,0.464286


---------- PRIMITIVE VARNODES (metatype = POINTER) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=POINTER,Missed decomposed (primitive) varnodes w/ metatype=POINTER,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=POINTER"
[,170,0,20,0,59,91,0.825
b2sum,140,0,48,0,54,38,0.646429
base32,107,0,32,0,44,31,0.672897
base64,107,0,32,0,44,31,0.672897
basename,104,0,32,0,44,28,0.663462
basenc,138,0,48,0,51,39,0.646739
cat,115,0,40,0,48,27,0.634783
chcon,234,2,52,0,119,61,0.69765
chgrp,230,3,46,0,118,63,0.708696
chmod,217,2,42,0,115,58,0.713134


---------- PRIMITIVE VARNODES (metatype = UNDEFINED) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=UNDEFINED,Missed decomposed (primitive) varnodes w/ metatype=UNDEFINED,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=UNDEFINED"
[,8,0,8,0,0,0,0.25
b2sum,1,0,1,0,0,0,0.25
base32,1,0,1,0,0,0,0.25
base64,1,0,1,0,0,0,0.25
basename,1,0,1,0,0,0,0.25
basenc,2,0,2,0,0,0,0.25
cat,1,0,1,0,0,0,0.25
chcon,1,0,1,0,0,0,0.25
chgrp,1,0,1,0,0,0,0.25
chmod,1,0,1,0,0,0,0.25


---------- ARRAY COMPARISONS ----------


Unnamed: 0,Array comparisons,Array length average difference (ground truth length - decompiler length),Array length average absolute error ratio (length error / ground truth length),Array size average difference (ground truth size - decompiler size),Array size average absolute error ratio (size error / ground truth size),"Dimension match ratio [0,1]","Array comparison average element type comparison score [0,1]"
[,10,-15.0,1.37939,-2.9,0.07939,1.0,0.766667
b2sum,30,-37.966667,21.879241,27.1,0.400075,0.966667,0.377778
base32,8,-37.625,5.140905,-1.875,0.015905,1.0,0.666667
base64,9,-33.444444,4.569693,-1.666667,0.014137,1.0,0.666667
basename,7,-43.0,5.87532,-2.142857,0.018177,1.0,0.666667
basenc,13,-42.230769,3.163634,-1.153846,0.009787,1.0,0.692308
cat,8,-53.125,5.140905,-1.875,0.015905,1.0,0.666667
chcon,12,-49.083333,3.448764,-4.583333,0.032098,1.0,0.694444
chgrp,9,-58.222222,4.601439,-2.333333,0.045884,1.0,0.666667
chmod,10,-46.4,4.21657,-2.3,0.11657,1.0,0.7
