In [3]:
import json
import os
import subprocess
import shlex
import requests
import copy
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import multiprocessing
from multiprocessing.managers import BaseManager, DictProxy
import importlib
import requests
import sys
from arango import ArangoClient
from arango.http import DefaultHTTPClient

# VP Checker Accuracy: Section 7.4 Table 1
In this notebook, we present how to re-generate the numbers that populate Table 1. 

## Claim
In Table 1, we report the reduction in the number of vulnerable exported functions for the nine most vulnerable libraries in our dataset after recompiling them with LLVM and resolving indirect call targets using an LLVM pass that matches the signatures of indirect call sites to candidate targets. We first measure how many exported functions have a path to at least one CVE in the over-approximated sysfilter graph, then apply the LLVM pass to rewrite indirect calls in those nine libraries and repeat the measurement. Because the graph is large and dense (~16 million nodes and ~70 million edges), this script is compute-intensive.

### Regenerating the table numbers
The script `scripts/get_vuln_exported.py` was used to generate `data/vuln_paths_llvm.json` and `data/vuln_paths_sysfilter.json`. We ran it on a 48-core 372 GB server for several hours to produce:
1. `data/vuln_paths_sysfilter.json` - measurements using Sysfilterâ€™s over-approximated call graph (the graph VPChecker uses for all evaluations).
3. `data/vuln_paths_llvm.json` - measurements using LLVM's more precise call graph

### Running
Run the cells below sequentially to reproduce the table in the final cell.

### Create Connections with ArangoDB
Ensure that ArangoDB has been installed and the service is available on port 8529. For more details, refer to the installation and DB restoration instructions in the `../../../infrastructure/arangodb/` directory

In [4]:
global_client = ArangoClient(hosts='http://localhost:8529', http_client=DefaultHTTPClient(request_timeout=10000))
global_db = global_client.db('sysfilter', username='root', password='root')

## Importing Vulnerable Exported functions data

In [12]:
vuln_paths_llvm = json.load(open("./data/vuln_paths_llvm.json"))
vuln_paths_reg = json.load(open("./data/vuln_paths_sysfilter.json"))

#### CVEs reachable from all exported functions of libxml2 when using more precise LLVM call graph

In [19]:
vuln_paths_llvm['libxml2.so.2@libxml2_2.12.7%2Bdfsg-3']['CVES_REACHED']

['CVE-2023-39615',
 'CVE-2022-29824',
 'CVE-2022-40303',
 'CVE-2023-45322',
 'CVE-2022-23308',
 'CVE-2023-28484',
 'CVE-2022-2309',
 'CVE-2022-40304',
 'CVE-2024-25062']

#### CVEs reachable from all exported functions of libxml2 when using overapproximated sysfilter call graph

In [21]:
vuln_paths_reg['libxml2.so.2@libxml2_2.12.7%2Bdfsg-3']['CVES_REACHED']

['CVE-2023-39615',
 'CVE-2024-25062',
 'CVE-2022-2309',
 'CVE-2022-40303',
 'CVE-2022-40304',
 'CVE-2022-29824',
 'CVE-2022-23308',
 'CVE-2023-28484',
 'CVE-2023-45322']

#### Number of exported functions that reach atleast one CVE when using the overapproximated call graph from sysfilter

In [17]:
len(vuln_paths_reg['libxml2.so.2@libxml2_2.12.7%2Bdfsg-3']['VULN'])

915

#### Number of exported functions that reach atleast one CVE when using the more precise LLVM call graph

In [22]:
len(vuln_paths_llvm['libxml2.so.2@libxml2_2.12.7%2Bdfsg-3']['VULN'])

907

#### Number of indirect call sites identified by sysfilter and LLVM

In [24]:
llvm_indir_callsite_num_dict = json.load(open("./data/indir_callsite_count.json", "r"))

In [24]:
QUERY = f"FOR f in functions FILTER f.lib == @lib_name && f.deb == @deb_name RETURN f"

### AICT Computation
When comparing Average Indirect Call Target (AICT) numbers between two call graphs generated by two different techniques (in this case sysfilter vs LLVM), the technique with a lower AICT is more precise in terms of indirect call target resolution

In [25]:
sysfilter_aict = {}
llvm_aict = {}
for lib in vuln_paths_llvm:
    lib_name = lib.split("@")[0]
    deb_name  =lib.split("@")[-1]
    cursor = global_db.aql.execute(QUERY, bind_vars={'lib_name':lib_name, 'deb_name':deb_name})
    all_funcs = [ f for f in cursor ]
    at_funcs = [ f for f in all_funcs if f['implicit_target'] ]
    indir_sources = [ f for f in all_funcs if f['implicit_source'] ]
    sysfilter_aict[lib] = len(at_funcs)/len(indir_sources)
    try:
        llvm_aict[lib] = len(at_funcs)/llvm_indir_callsite_num_dict[lib]
    except:
        continue

#### AICT for sysfilter

In [26]:
sysfilter_aict

{'libexpat.so.1@libexpat1_2.6.2-1': 2.1176470588235294,
 'libXpm.so.4@libxpm4_1:3.5.17-1%2Bb1': 0.8235294117647058,
 'libtiff.so.6@libtiff6_4.5.1%2Bgit230720-4': 2.3214285714285716,
 'libcurl.so.4@libcurl4t64_8.8.0-2': 0.9473684210526315,
 'libr_core.so.5.9.2@libradare2-5.0.0t64_5.9.2%2Bdfsg-1': 4.056338028169014,
 'libgnutls.so.30@libgnutls30t64_3.8.5-4': 0.5393617021276595,
 'libxml2.so.2@libxml2_2.12.7%2Bdfsg-3': 0.46588235294117647,
 'libfreerdp3.so.3@libfreerdp3-3_3.5.1%2Bdfsg1-5': 2.6752577319587627,
 'libnetsnmpmibs.so.40@libsnmp40t64_5.9.4%2Bdfsg-1.1%2Bb1': 4.48695652173913,
 'libcrypto.so.3@libssl3t64_3.2.2-1': 5.172739541160594}

#### AICT for LLVM

In [27]:
llvm_aict

{'libexpat.so.1@libexpat1_2.6.2-1': 0.47745358090185674,
 'libXpm.so.4@libxpm4_1:3.5.17-1%2Bb1': 0.358974358974359,
 'libtiff.so.6@libtiff6_4.5.1%2Bgit230720-4': 0.9969325153374233,
 'libcurl.so.4@libcurl4t64_8.8.0-2': 0.43902439024390244,
 'libr_core.so.5.9.2@libradare2-5.0.0t64_5.9.2%2Bdfsg-1': 3.891891891891892,
 'libgnutls.so.30@libgnutls30t64_3.8.5-4': 0.18715393133997785,
 'libxml2.so.2@libxml2_2.12.7%2Bdfsg-3': 0.14781634938409854,
 'libnetsnmpmibs.so.40@libsnmp40t64_5.9.4%2Bdfsg-1.1%2Bb1': 2.592964824120603,
 'libcrypto.so.3@libssl3t64_3.2.2-1': 2.55363091272485}

## Generating Numbers for Table 1

In [28]:
for lib, lib_dict in vuln_paths_llvm.items():
    try:
        print(f"{lib.split('@')[0]},{len(lib_dict['TOTAL'])},{len(lib_dict['EXPORTED'])},{len(vuln_paths_reg[lib]['VULN'])},{len(vuln_paths_reg[lib]['CVES_REACHED'])},{round(sysfilter_aict[lib],2)},{len(lib_dict['VULN'])},{len(lib_dict['CVES_REACHED'])}, {round(llvm_aict[lib], 2)}")
    except:
        continue

libexpat.so.1,294,69,19,14,2.12,17,14, 0.48
libXpm.so.4,97,34,16,6,0.82,16,6, 0.36
libtiff.so.6,699,193,107,21,2.32,80,21, 1.0
libcurl.so.4,2811,88,69,28,0.95,68,28, 0.44
libr_core.so.5.9.2,2424,759,295,7,4.06,191,7, 3.89
libgnutls.so.30,2820,1312,931,6,0.54,910,6, 0.19
libxml2.so.2,2171,1398,915,9,0.47,907,9, 0.15
libnetsnmpmibs.so.40,2050,1717,135,6,4.49,132,6, 2.59
libcrypto.so.3,9422,4003,2675,25,5.17,2661,25, 2.55
