In [None]:
!pip3 install mitrecve==1.1.1

In [1]:
import requests
from mitrecve import crawler
from pprint import pprint

In [2]:
def mitre_cve_api(package):
    cve_simple = crawler.get_main_page(package) 
    return crawler.get_cve_detail(cve_simple)


In [3]:
entropy_node_features = [
    "Readline/readline-i.ri", "libxml/xmlstring.h", "ClassMethods/commands-i.ri", "bundler/plugin",
    "Color/set_color-i.ri", "HiddenCommand/cdesc-HiddenCommand.ri", "Thor/Base", "templates/newgem",
    "Actions/inject_into_class-i.ri", "source/git"
]

In [4]:
heterogat_node_features = [
    "tmp/pip-ephem-wheel-cache-e2vc_lpv",
    "simple/fakerv2",
    "fakerv2_1.0",
    "python3bin/analyze-python.py--version1.0installfakerv2",
    "tmp/1cggeydu",
    "Aix/FfiHelper",
    "FfiHelper/address_to_string-c.ri",
    "FfiHelper/log-c.ri",
    "FfiHelper/read_interfaces-c.ri",
    "FfiHelper/read_load_averages-c.ri"
]

In [5]:
import concurrent.futures
import threading
import re

def extract_clean_package_name(raw_name: str) -> str:
    """
    Extracts package name from a raw feature string by removing version-like suffixes.

    Examples:
        'fakerv2_1.0'         → 'faker'
        'requests-v2.31.0'    → 'requests'
        'torch_1.13.0'        → 'torch'
        'flaskv1'             → 'flask'
        'html5lib-0.99999999' → 'html5lib'
    """
    # Keep only the base name (before / if present)
    name = raw_name.split('/')[0]

    # Step 1: Remove common version patterns
    # Patterns like: -v1.2.3, _1.2, v1.2, -v1, _v2, etc.
    name = re.sub(r'[-_]?v?\d+(\.\d+)*$', '', name)

    # Optional: remove trailing non-alpha characters
    name = re.sub(r'[^a-zA-Z]+$', '', name)

    return name.lower()

def count_cves_for_features(entropy_node_features, timeout=300, max_workers=10, max_retries=2):
    from collections import defaultdict

    total_cve_count = 0
    package_cve_count_map = {}
    seen = set()

    # Collect all unique items (package/module names)
    for node in entropy_node_features:
        parts = node.split('/')
        items = parts[:2] if len(parts) > 1 else parts
        # extract suitable names
        items = [extract_clean_package_name(item) for item in items]
        seen.update(items)

    def fetch_cves(item):
        for attempt in range(max_retries):
            try:
                print(f"Fetching CVEs for {item}... (Attempt {attempt+1})")

                # Add timeout using thread-safe container
                result = {}
                def target():
                    try:
                        result['data'] = mitre_cve_api(item)
                    except Exception as e:
                        result['error'] = e

                thread = threading.Thread(target=target)
                thread.start()
                thread.join(timeout)

                if thread.is_alive():
                    raise TimeoutError(f"Timeout when fetching {item}")
                if 'error' in result:
                    raise result['error']

                cve_dict = result['data']
                cve_count = len(cve_dict)
                print(f"→ {item}: {cve_count} CVEs found.\n")
                return item, cve_count

            except Exception as e:
                print(f"⚠️  Error fetching CVEs for {item}: {e}")
                if attempt == max_retries - 1:
                    return item, 0
            finally:
                print("-" * 80)

    # Use ThreadPoolExecutor to parallelize fetching
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(fetch_cves, item): item for item in seen}
        for future in concurrent.futures.as_completed(futures):
            item, count = future.result()
            package_cve_count_map[item] = count
            total_cve_count += count

    print(f"\n✅ Total matched CVEs for this feature set: {total_cve_count}")
    return total_cve_count, package_cve_count_map


In [6]:
# match cve number for entropy node
entropy_total_cves, entropy_cve_map = count_cves_for_features(entropy_node_features)

Fetching CVEs for templates... (Attempt 1)
Fetching CVEs for source... (Attempt 1)
Fetching CVEs for color... (Attempt 1)
Fetching CVEs for newgem... (Attempt 1)
Fetching CVEs for commands-i.ri... (Attempt 1)
Fetching CVEs for bundler... (Attempt 1)
Fetching CVEs for git... (Attempt 1)
Fetching CVEs for hiddencommand... (Attempt 1)
Fetching CVEs for inject_into_class-i.ri... (Attempt 1)
Fetching CVEs for readline... (Attempt 1)
→ inject_into_class-i.ri: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for thor... (Attempt 1)
→ commands-i.ri: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for readline-i.ri... (Attempt 1)
→ hiddencommand: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for cdesc-hiddencommand.ri... (Attempt 1)
→ newgem: 0 CVEs found.

------------------------------------------------------

In [7]:
print("\nEntropy Node Features CVE Count:", entropy_total_cves)


Entropy Node Features CVE Count: 87


In [8]:
# match cve number for heterogat node
hererogat_total_cves, heterogat_cve_map = count_cves_for_features(heterogat_node_features)

Fetching CVEs for pip-ephem-wheel-cache-e2vc_lpv... (Attempt 1)
Fetching CVEs for ffihelper... (Attempt 1)
Fetching CVEs for log-c.ri... (Attempt 1)
Fetching CVEs for python3bin... (Attempt 1)
Fetching CVEs for aix... (Attempt 1)
Fetching CVEs for fakerv... (Attempt 1)
Fetching CVEs for 1cggeydu... (Attempt 1)
Fetching CVEs for analyze-python.py--version1.0installfaker... (Attempt 1)
Fetching CVEs for tmp... (Attempt 1)
Fetching CVEs for faker... (Attempt 1)
→ ffihelper: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for simple... (Attempt 1)
→ python3bin: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for read_interfaces-c.ri... (Attempt 1)
→ log-c.ri: 0 CVEs found.

--------------------------------------------------------------------------------
Fetching CVEs for read_load_averages-c.ri... (Attempt 1)
→ pip-ephem-wheel-cache-e2vc_lpv: 0 CVEs found.

----------

In [9]:
print("\nHeterogat Node Features CVE Count:", hererogat_total_cves)


Heterogat Node Features CVE Count: 1
