In [1]:
# imports
import setup

# libs
from IPython.display import display
import numpy as np
import pandas as pd

# local
from build_parse import *
from metrics import *

In [2]:
# options for display, etc.
pd.set_option('display.max_rows', None)

In [3]:
prognames = [ "ndarray", "typecases", "p0", "structcases" ]
progs = [ ToyProgram(progname) for progname in prognames ]

# progs = COREUTILS_PROGS
prog_names = [ prog.get_name() for prog in progs ]

opts = BuildOptions()
dwarf_opts = BuildOptions(debug=True, strip=False, optimization=opts.optimization)
for prog in progs:
    prog.build_if_not_valid(opts)
    prog.build_if_not_valid(dwarf_opts)

metrics_groups = make_metrics()

In [4]:
# compute the DWARF vs Ghidra comparisons for each program, given the build options
cmps = [ parse_compare_program(prog, opts, decompiler="ghidra") for prog in progs ]


dfs = []
for i, metrics_grp in enumerate(metrics_groups):
    df = compute_comparisons_metrics_dataframe(
        prog_names,
        cmps,
        metrics_grp
    )
    csv_out_path = DATA_DIR.joinpath("toy_{}.csv".format(i))
    df.to_csv(csv_out_path)
    dfs.append(df)


openjdk version "11.0.17" 2022-10-18
OpenJDK Runtime Environment (build 11.0.17+8-post-Ubuntu-1ubuntu220.04)
OpenJDK 64-Bit Server VM (build 11.0.17+8-post-Ubuntu-1ubuntu220.04, mixed mode)


INFO  Using log config file: jar:file:/home/jacekline/dev/research/ghidra-build/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/jacekline/.ghidra/.ghidra_10.2_DEV/application.log (LoggingInitialization)  
INFO  Loading user preferences: /home/jacekline/.ghidra/.ghidra_10.2_DEV/preferences (Preferences)  
INFO  Class search complete (1469 ms) (ClassSearcher)  
INFO  Initializing SSL Context (SSLContextInitializer)  
INFO  Initializing Random Number Generator... (SecureRandomFactory)  
INFO  Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory)  
INFO  Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory)  
INFO  HEADLESS Script Paths:
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/BytePatterns/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/FunctionID/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/

openjdk version "11.0.17" 2022-10-18
OpenJDK Runtime Environment (build 11.0.17+8-post-Ubuntu-1ubuntu220.04)
OpenJDK 64-Bit Server VM (build 11.0.17+8-post-Ubuntu-1ubuntu220.04, mixed mode)


INFO  Using log config file: jar:file:/home/jacekline/dev/research/ghidra-build/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/jacekline/.ghidra/.ghidra_10.2_DEV/application.log (LoggingInitialization)  
INFO  Loading user preferences: /home/jacekline/.ghidra/.ghidra_10.2_DEV/preferences (Preferences)  
INFO  Class search complete (1167 ms) (ClassSearcher)  
INFO  Initializing SSL Context (SSLContextInitializer)  
INFO  Initializing Random Number Generator... (SecureRandomFactory)  
INFO  Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory)  
INFO  Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory)  
INFO  HEADLESS Script Paths:
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/BytePatterns/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/FunctionID/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/

openjdk version "11.0.17" 2022-10-18
OpenJDK Runtime Environment (build 11.0.17+8-post-Ubuntu-1ubuntu220.04)
OpenJDK 64-Bit Server VM (build 11.0.17+8-post-Ubuntu-1ubuntu220.04, mixed mode)


INFO  Using log config file: jar:file:/home/jacekline/dev/research/ghidra-build/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/jacekline/.ghidra/.ghidra_10.2_DEV/application.log (LoggingInitialization)  
INFO  Loading user preferences: /home/jacekline/.ghidra/.ghidra_10.2_DEV/preferences (Preferences)  
INFO  Class search complete (1147 ms) (ClassSearcher)  
INFO  Initializing SSL Context (SSLContextInitializer)  
INFO  Initializing Random Number Generator... (SecureRandomFactory)  
INFO  Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory)  
INFO  Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory)  
INFO  HEADLESS Script Paths:
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/BytePatterns/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/FunctionID/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/

openjdk version "11.0.17" 2022-10-18
OpenJDK Runtime Environment (build 11.0.17+8-post-Ubuntu-1ubuntu220.04)
OpenJDK 64-Bit Server VM (build 11.0.17+8-post-Ubuntu-1ubuntu220.04, mixed mode)


INFO  Using log config file: jar:file:/home/jacekline/dev/research/ghidra-build/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/jacekline/.ghidra/.ghidra_10.2_DEV/application.log (LoggingInitialization)  
INFO  Loading user preferences: /home/jacekline/.ghidra/.ghidra_10.2_DEV/preferences (Preferences)  
INFO  Class search complete (1233 ms) (ClassSearcher)  
INFO  Initializing SSL Context (SSLContextInitializer)  
INFO  Initializing Random Number Generator... (SecureRandomFactory)  
INFO  Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory)  
INFO  Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory)  
INFO  HEADLESS Script Paths:
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/BytePatterns/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/Ghidra/Features/FunctionID/ghidra_scripts
    /home/jacekline/dev/research/ghidra-build/

In [5]:
for metrics_grp, df in zip(metrics_groups, dfs):
    print("{} {} {}".format("-"*10, metrics_grp.get_name(), "-"*10))
    display(df)

---------- DATA BYTES ----------


Unnamed: 0,Ground truth data bytes,Overlapped data bytes,Missed data bytes
ndarray,4440,4440,0
typecases,584,572,12
p0,61,61,0
structcases,80,67,13


---------- FUNCTIONS ----------


Unnamed: 0,Ground truth functions,Found functions,Missed functions
ndarray,1,1,0
typecases,4,4,0
p0,3,3,0
structcases,1,1,0


---------- VARNODES ----------


Unnamed: 0,Ground truth varnodes,Varnodes matched @ level=OVERLAP,Varnodes matched @ level=SUBSET,Varnodes matched @ level=ALIGNED,Varnodes matched @ level=MATCH,Missed varnodes,"Varnode average comparison score [0,1]"
ndarray,3,2,0,0,1,0,0.5
typecases,15,3,2,8,2,0,0.65
p0,7,0,0,2,5,0,0.928571
structcases,6,0,3,2,1,0,0.666667


---------- VARNODES (metatype = INT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=INT,Missed varnodes w/ metatype=INT,Decompiler varnodes w/ metatype=INT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=INT matched @ level=SUBSET,Decompiler varnodes w/ metatype=INT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=INT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=INT"
ndarray,0,0,0,0,0,0,
typecases,8,0,0,0,6,2,0.8125
p0,6,0,0,0,2,4,0.916667
structcases,0,0,0,0,0,0,


---------- VARNODES (metatype = FLOAT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=FLOAT,Missed varnodes w/ metatype=FLOAT,Decompiler varnodes w/ metatype=FLOAT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=FLOAT matched @ level=SUBSET,Decompiler varnodes w/ metatype=FLOAT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=FLOAT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=FLOAT"
ndarray,0,0,0,0,0,0,
typecases,0,0,0,0,0,0,
p0,0,0,0,0,0,0,
structcases,0,0,0,0,0,0,


---------- VARNODES (metatype = POINTER) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=POINTER,Missed varnodes w/ metatype=POINTER,Decompiler varnodes w/ metatype=POINTER matched @ level=OVERLAP,Decompiler varnodes w/ metatype=POINTER matched @ level=SUBSET,Decompiler varnodes w/ metatype=POINTER matched @ level=ALIGNED,Decompiler varnodes w/ metatype=POINTER matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=POINTER"
ndarray,0,0,0,0,0,0,
typecases,1,0,0,0,1,0,0.75
p0,0,0,0,0,0,0,
structcases,1,0,0,0,0,1,1.0


---------- VARNODES (metatype = ARRAY) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=ARRAY,Missed varnodes w/ metatype=ARRAY,Decompiler varnodes w/ metatype=ARRAY matched @ level=OVERLAP,Decompiler varnodes w/ metatype=ARRAY matched @ level=SUBSET,Decompiler varnodes w/ metatype=ARRAY matched @ level=ALIGNED,Decompiler varnodes w/ metatype=ARRAY matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=ARRAY"
ndarray,3,0,2,0,0,1,0.5
typecases,2,0,2,0,0,0,0.25
p0,1,0,0,0,0,1,1.0
structcases,0,0,0,0,0,0,


---------- VARNODES (metatype = STRUCT) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=STRUCT,Missed varnodes w/ metatype=STRUCT,Decompiler varnodes w/ metatype=STRUCT matched @ level=OVERLAP,Decompiler varnodes w/ metatype=STRUCT matched @ level=SUBSET,Decompiler varnodes w/ metatype=STRUCT matched @ level=ALIGNED,Decompiler varnodes w/ metatype=STRUCT matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=STRUCT"
ndarray,0,0,0,0,0,0,
typecases,3,0,1,2,0,0,0.416667
p0,0,0,0,0,0,0,
structcases,3,0,0,3,0,0,0.5


---------- VARNODES (metatype = UNION) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=UNION,Missed varnodes w/ metatype=UNION,Decompiler varnodes w/ metatype=UNION matched @ level=OVERLAP,Decompiler varnodes w/ metatype=UNION matched @ level=SUBSET,Decompiler varnodes w/ metatype=UNION matched @ level=ALIGNED,Decompiler varnodes w/ metatype=UNION matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=UNION"
ndarray,0,0,0,0,0,0,
typecases,1,0,0,0,1,0,0.75
p0,0,0,0,0,0,0,
structcases,2,0,0,0,2,0,0.75


---------- VARNODES (metatype = UNDEFINED) ----------


Unnamed: 0,Ground truth varnodes w/ metatype=UNDEFINED,Missed varnodes w/ metatype=UNDEFINED,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=OVERLAP,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=SUBSET,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=ALIGNED,Decompiler varnodes w/ metatype=UNDEFINED matched @ level=MATCH,"Varnode average compare score [0,1] w/ metatype=UNDEFINED"
ndarray,0,0,0,0,0,0,
typecases,0,0,0,0,0,0,
p0,0,0,0,0,0,0,
structcases,0,0,0,0,0,0,


---------- PRIMITIVE VARNODES ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes,Decomposed (primitive) varnodes matched @ level=OVERLAP,Decomposed (primitive) varnodes matched @ level=SUBSET,Decomposed (primitive) varnodes matched @ level=ALIGNED,Decomposed (primitive) varnodes matched @ level=MATCH,Missed decomposed (primitive) varnodes,"Decomposed (primitive) varnode average comparison score [0,1]"
ndarray,1110,1110,0,0,0,0,0.25
typecases,132,1,0,14,117,0,0.967803
p0,16,0,0,2,14,0,0.96875
structcases,13,1,0,8,4,0,0.788462


---------- PRIMITIVE VARNODES (metatype = INT) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=INT,Missed decomposed (primitive) varnodes w/ metatype=INT,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=INT matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=INT"
ndarray,1110,0,1110,0,0,0,0.25
typecases,127,0,0,0,12,115,0.976378
p0,16,0,0,0,2,14,0.96875
structcases,7,0,0,0,7,0,0.75


---------- PRIMITIVE VARNODES (metatype = FLOAT) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=FLOAT,Missed decomposed (primitive) varnodes w/ metatype=FLOAT,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=FLOAT matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=FLOAT"
ndarray,0,0,0,0,0,0,
typecases,0,0,0,0,0,0,
p0,0,0,0,0,0,0,
structcases,0,0,0,0,0,0,


---------- PRIMITIVE VARNODES (metatype = POINTER) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=POINTER,Missed decomposed (primitive) varnodes w/ metatype=POINTER,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=POINTER matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=POINTER"
ndarray,0,0,0,0,0,0,
typecases,4,0,0,0,2,2,0.875
p0,0,0,0,0,0,0,
structcases,4,0,0,0,0,4,1.0


---------- PRIMITIVE VARNODES (metatype = UNDEFINED) ----------


Unnamed: 0,Ground truth decomposed (primitive) varnodes w/ metatype=UNDEFINED,Missed decomposed (primitive) varnodes w/ metatype=UNDEFINED,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=OVERLAP,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=SUBSET,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=ALIGNED,Decompiler decomposed (primitive) varnodes w/ metatype=UNDEFINED matched @ level=MATCH,"Decomposed (primitive) varnode average compare score [0,1] w/ metatype=UNDEFINED"
ndarray,0,0,0,0,0,0,
typecases,1,0,1,0,0,0,0.25
p0,0,0,0,0,0,0,
structcases,2,0,1,0,1,0,0.5


---------- ARRAY COMPARISONS ----------


Unnamed: 0,Array comparisons,Array length average difference (ground truth length - decompiler length),Array length average absolute error ratio (length error / ground truth length),Array size average difference (ground truth size - decompiler size),Array size average absolute error ratio (size error / ground truth size),"Dimension match ratio [0,1]","Array comparison average element type comparison score [0,1]"
ndarray,2,274.5,0.4995,-4.0,0.001,0.0,0.666667
typecases,2,-2.0,0.11,-8.0,0.11,0.5,1.0
p0,1,0.0,0.0,0.0,0.0,1.0,1.0
structcases,0,,,,,,
