In [9]:
import os, sys
import json
import re
import Levenshtein
from tqdm import tqdm
from multiprocessing import Pool
import time

In [2]:
input_folder = '{path_to_libgen}/libgen/VulLib'
train_path = os.path.join(input_folder, 'train.json')
valid_path = os.path.join(input_folder, 'valid.json')
test_path = os.path.join(input_folder, 'test.json')
maven_path = os.path.join(input_folder, 'maven_corpus_new.json')

In [3]:
with open(train_path, 'r') as f:
    train = json.load(f)
with open(valid_path, 'r') as f:
    valid = json.load(f)
with open(test_path, 'r') as f:
    test = json.load(f) 
vulns = train + valid + test

In [4]:
lib_names = set([lib[6:] for vuln in vulns for lib in vuln['labels']])

In [4]:
with open(maven_path, 'r') as f:
    maven_corpus = json.load(f)
lib_names = set([lib['name'] for lib in maven_corpus])

In [5]:
artifacts = {item.split(':')[-1]: set() for item in lib_names}
for item in lib_names:
    components = item.split(':')
    artifacts[components[-1]].add(item)

In [351]:
with open('response/1_res_gpt-35-turbo.json', 'r') as f:
    res = json.load(f)

In [268]:
with open('response/20_res_gpt-35-turbo.json', 'r') as f:
    res = json.load(f)

In [18]:
with open('response/0_res_gpt-4-1106-preview.json', 'r') as f:
    res = json.load(f)

In [16]:
with open('response/1_res_gpt-4-1106-preview.json', 'r') as f:
    res = json.load(f)

In [6]:
with open('response/0_res_gpt-35-turbo.json', 'r') as f:
    res = json.load(f)

In [7]:
maven_regex = 'maven:[-.\w]+:[-.\w]+'
for item in res:
    item['top_k'] = re.findall(maven_regex, item['top_res'][0])
    item['rerank_k'] = re.findall(maven_regex, item['rerank_res'][0])

In [15]:
maven_regex = 'maven:[-.\w]+:[-.\w]+'
for item in res:
    item['top_k'] = re.findall(maven_regex, item['top_res'][0])
    item['rerank_k'] = re.findall(maven_regex, item['rerank_res'][0])
    
target = [lib for vuln in res for lib in vuln['rerank_k']]

with Pool(processes=32) as pool:
    result = list(tqdm(pool.imap(closest_lib, target)))
    
idx = 0
for vuln in res:
    vuln['rerank_k_post'] = []
    for lib in vuln['rerank_k']:
        vuln['rerank_k_post'].append('maven:'+result[idx])
        idx = idx + 1

k = 1
p = [precision(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
print(sum(p[:205]) / len(p[:205]), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r)))

k = 1
p = [precision(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
print(sum(p[:205]) / len(p[:205]), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r)))

5448it [00:22, 240.51it/s]

0.4292682926829268 0.31743559416560524 0.3578981245102315
0.5365853658536586 0.3980324115946208 0.44483988203837055





In [348]:
maven_regex = 'maven:[-.\w]+:[-.\w]+'
for item in res:
    item['top_k'] = re.findall(maven_regex, item['top_res'][0])
    item['rerank_k'] = re.findall(maven_regex, item['rerank_res'][0])
    
target = [lib for vuln in res for lib in vuln['top_k']]

with Pool(processes=32) as pool:
    result = list(tqdm(pool.imap(closest_lib, target)))
    
idx = 0
for vuln in res:
    vuln['top_k_post'] = []
    for lib in vuln['top_k']:
        vuln['top_k_post'].append('maven:'+result[idx])
        idx = idx + 1

k = 1
p = [precision(vuln, response['top_k'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['top_k'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
print(sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r)))

k = 1
p = [precision(vuln, response['top_k_post'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['top_k_post'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
print(sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r)))

8939it [00:00, 23891.91it/s]


0.5525277877375403 0.4294613089790577 0.483282976893477
0.637863033345285 0.4979036590277177 0.5592598205139719


In [11]:
weights = (1, 2, 2)

def cloest_artifact(artifact_id):
    global artifacts, weights
    if artifact_id in artifacts:
        return artifact_id

    distances = [(Levenshtein.distance(artifact_id, item,\
                    weights = weights), item) for item in artifacts]
    return min(distances)[1]

def cloest_group(group_id, groups):
    if len(groups) == 0:
        return group_id
    if len(groups) == 1:
        return next(iter(groups))
    
    global weights
    distances = [(Levenshtein.distance(group_id, item.split(':')[-2],\
                    weights = weights), item) for item in groups]
    return min(distances)[1]


def closest_lib(label):
    global lib_names
    if label in lib_names:
        return label
    if len(label.split(':')) > 1:
        group_id, artifact_id = label.split(':')[-2], label.split(':')[-1]
    else:
        group_id, artifact_id = "", label.split(':')[-1]
    if artifact_id in artifacts:
        return cloest_group(group_id, artifacts[artifact_id])
    else:
        advanced_artifact_id = cloest_artifact(artifact_id)
        return cloest_group(group_id, artifacts[advanced_artifact_id])

In [27]:
len(lib_names)

311233

In [31]:
'org.springframework:spring-core' in lib_names

True

In [189]:
target = [lib for vuln in res for lib in vuln['top_k']]

In [271]:
target = [lib for vuln in res for lib in vuln['rerank_k']]

In [138]:
target = [lib for vuln in res[-633:] for lib in vuln['rerank_k']]

In [219]:
target[0]

'maven:org.jgroups:jgroups'

In [272]:
with Pool(processes=32) as pool:
    result = list(tqdm(pool.imap(closest_lib, target)))

11528it [00:00, 27525.60it/s]


In [12]:
def precision(vuln, pred, k):
    labels = vuln['labels']
    if len(labels) == 0:
        return None
    inter = set(labels) & set(pred[:k])
    return len(inter) / min(k, len(labels))

def recall(vuln, pred, k):
    labels = vuln['labels']
    if len(labels) == 0:
        return None
    inter = set(labels) & set(pred[:k])
    return len(inter) / len(labels)

def f1_score(p, r):
    return 2*p*r/(p+r)

In [191]:
idx = 0
for vuln in res:
    vuln['top_k_post'] = []
    for lib in vuln['top_k']:
        vuln['top_k_post'].append('maven:'+result[idx])
        idx = idx + 1

In [273]:
idx = 0
for vuln in res:
    vuln['rerank_k_post'] = []
    for lib in vuln['rerank_k']:
        vuln['rerank_k_post'].append('maven:'+result[idx])
        idx = idx + 1

In [141]:
idx = 0
for vuln in res[-633:]:
    vuln['rerank_k_post'] = []
    for lib in vuln['rerank_k']:
        vuln['rerank_k_post'].append('maven:'+result[idx])
        idx = idx + 1

In [274]:
k = 1
p = [precision(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.7339548225170311, 0.5579836975767412, 0.6339850067983246)

In [275]:
k = 1
p = [precision(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.7923987092147723, 0.6032001788366068, 0.684974812176334)

In [208]:
k = 1
p = [precision(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.7303693079956974, 0.553230329754173, 0.6295770756263251)

In [210]:
k = 1
p = [precision(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['rerank_k'], 1) != None]
r = [recall(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['rerank_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.782717820007171, 0.5955276046079192, 0.6764108338150029)

In [211]:
k = 1
p = [precision(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if len(response['rerank_k']) > 0 and precision(vuln, response['rerank_k'], 1) != None]
r = [recall(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns, res) if len(response['rerank_k']) > 0 and recall(vuln, response['rerank_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.7915155910079767, 0.6022213521578994, 0.6840137112074123)

In [181]:
k = 1
p = [precision(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns[-633:], res[-633:]) if precision(vuln, response['top_k'], k) != None]
r = [recall(vuln, response['rerank_k'], k) for (vuln, response) in zip(vulns[-633:], res[-633:]) if recall(vuln, response['top_k'], k) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.6530973451327433, 0.5030538635405892, 0.5683394010617813)

In [180]:
k = 1
p = [precision(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns[-633:], res[-633:]) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['rerank_k_post'], k) for (vuln, response) in zip(vulns[-633:], res[-633:]) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.6637168141592921, 0.512493391564188, 0.5783838287943589)

In [192]:
k = 1
p = [precision(vuln, response['top_k'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['top_k'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.41054141269272143, 0.31759495036655344, 0.35813588278915653)

In [193]:
k = 1
p = [precision(vuln, response['top_k_post'], k) for (vuln, response) in zip(vulns, res) if precision(vuln, response['top_k'], 1) != None]
r = [recall(vuln, response['top_k_post'], k) for (vuln, response) in zip(vulns, res) if recall(vuln, response['top_k'], 1) != None]
sum(p) / len(p), sum(r) / len(r), f1_score(sum(p) / len(p), sum(r) / len(r))

(0.505198996055934, 0.39890887069983577, 0.44580601144089604)

In [None]:
res[0]