/
nvdtoolkit.py
45 lines (34 loc) · 1.33 KB
/
nvdtoolkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""Identifier based on nvd-toolkit."""
from toolkit import pipelines
from toolkit.transformers.classifiers import NBClassifier
from toolkit.transformers import feature_hooks
from cvejob.identifiers.naive import NaivePackageNameIdentifier
from cvejob.config import Config
from cvejob.cpe2pkg import run_cpe2pkg
class NvdToolkitPackageNameIdentifier(NaivePackageNameIdentifier):
"""Identifier based on nvd-toolkit."""
def identify(self):
"""Identify possible package name candidates."""
# restored pretrained classifier from the checkpoint
clf = NBClassifier.restore(checkpoint=Config.nvdtoolkit_export_dir)
hooks = [
feature_hooks.has_uppercase_hook,
feature_hooks.is_alnum_hook,
feature_hooks.ver_follows_hook,
feature_hooks.word_len_hook
]
pipeline = pipelines.get_prediction_pipeline(
classifier=clf,
feature_hooks=hooks
)
results = pipeline.fit_predict(
[self.doc.description], classifier__sample=True
).tolist()[0]
candidates = [x[0][0] for x in results]
ecosystem = Config.ecosystem
if ecosystem == 'java':
vendor = candidates
else:
vendor = [ecosystem]
product = candidates
return run_cpe2pkg(vendor, product)