### Table 3: Detection Performance Comparison of Three Detectors on 80 Web Pages

The related experiment data is arranged as follow.

`ranks.SEMrushRanks-us.csv` @ Download from SEMRUSH websites US traffic ranking on Feb 23, 2023.

`detection_result.csv` @ The detection result after applying LDC, Wappalyzer, and PTdetector on 80 top-traffic web pages.
- `domain`: The domain name
- `rank`: The domain rank
- `url`: The home page url under the domain
- `ground truth`: The ground truth library loaded on the web page
- `LDC`: The libraries identified by LDC
- `Wappalyzer`: The libraries identified by Wappalyzer
- `PTdetector`: The libraries identified by PTdetector
- `comment`: Optional comment

We define a class named `ConfMatrix` to provide basic functions to confusion matrix related calculation.

In [30]:
class ConfMatrix():
    def __init__(self, black_list=[]):
        self.BLACK_LIST = black_list
        self.TOTAL_LIB_NUM = 83

    def confusionMatrix(self, truelist, list):
        tp = 0
        for item in list:
            if item['name'] in [item2['name'] for item2 in truelist]:
                tp += 1
        fn = len(truelist) - tp

        fp = 0
        for item in list:
            if item['name'] not in [item2['name'] for item2 in truelist]:
                fp += 1
                
        tn = self.TOTAL_LIB_NUM - len(self.BLACK_LIST) - tp - fn - fp

        return tp, fn, fp, tn

    def calValue(self, TP, FN, FP, TN):
        # Accuracy
        Accu = round((TP + TN) * 100 / (TP + FP + FN + TN), 2)
        # Precision
        Pre = round(TP * 100 / (TP + FP), 2)
        # Recall
        Recall = round(TP * 100 / (TP + FN), 2)

        return Accu, Pre, Recall

    def filter(self, list, threshold=50):
        # Receive library object list
        new_list = []
        for lib_obj in list:
            if lib_obj["score"] == None or lib_obj["score"] >= threshold:
                new_list.append(lib_obj)          
        return new_list
    
    def convertToObject(self, str_list):
        # <libname> @ <version> : <score> ! <depth>
        obj_list = []
        for str in str_list:
            obj = {}
            _depth_pos = str.find('!')
            if _depth_pos != -1:
                obj['depth'] = int(str[_depth_pos + 1:])
                str = str[:_depth_pos]
            else:
                obj['depth'] = None

            _colon_pos = str.find(':')
            if _colon_pos != -1:
                obj['score'] = float(str[_colon_pos + 1:])
                str = str[:_colon_pos]
            else:
                obj['score'] = None

            _at_pos = str.find('@')
            if _at_pos != -1:
                obj['version'] = str[_at_pos + 1:]
                str = str[:_at_pos]
            else:
                obj['version'] = None

            obj['name'] = str

            if str not in self.BLACK_LIST:
                obj_list.append(obj)

        return obj_list

Following code calculates the accuracy, precision, and recall value of three detectors.

In [31]:
import json
import pandas as pd

CM = ConfMatrix()
df= pd.read_csv("./detection_result.csv",delimiter=';')
gt_column = df['ground truth'].tolist()

def analyze(tool_name, threshold):
    TP, FN, FP, TN = 0, 0, 0, 0
    tool_column = df[tool_name].tolist()

    for i in range(len(tool_column)):
        # Decode library result string
        list0 = CM.convertToObject(json.loads(gt_column[i]))
        list1 = CM.convertToObject(json.loads(tool_column[i]))

        # Filter based on score threshold
        if threshold != None:
            list1 = CM.filter(list1, threshold)
            
        tp, fn, fp, tn = CM.confusionMatrix(list0, list1)
        TP += tp
        FN += fn
        FP += fp
        TN += tn

    # Calculate the accuracy, precision, and recall
    a, p, r = CM.calValue(TP, FN, FP, TN)
    if len(tool_name) <5:
        tool_name += '\t'
    print(f"{tool_name}\t{a}%\t\t{p}%\t\t{r}%")

if __name__ == "__main__":
    print("\t\tAccuracy\tPrecision\tRecall")
    analyze('LDC', None)
    analyze('Wappalyzer', None)
    analyze('PTdetector', 57)
    analyze('PTdetector', 70)
    analyze('PTdetector', 77)


		Accuracy	Precision	Recall
LDC		99.1%		96.59%		83.33%
Wappalyzer	98.63%		94.61%		74.51%
PTdetector	99.41%		96.84%		90.2%
PTdetector	99.43%		100.0%		87.58%
PTdetector	99.25%		100.0%		83.66%
