In [5]:
whitelist = pd.read_csv("../data/whitelist1.csv")
whitelist

Unnamed: 0,cpeName,Title,part,vendor,product,version
0,cpe:2.3:a:adobe:acrobat:-:*:*:*:*:*:*:*,Adobe Acrobat,a,adobe,acrobat,
1,cpe:2.3:a:adobe:acrobat:-:*:*:*:*:android:*:*,Adobe Acrobat for Android,a,adobe,acrobat,
2,cpe:2.3:a:adobe:acrobat:1.0:*:*:*:suite:*:*:*,Adobe Acrobat Suite 1.0,a,adobe,acrobat,1
3,cpe:2.3:a:adobe:acrobat:10.0.1:*:*:*:*:*:*:*,Adobe Acrobat X (10.0.1),a,adobe,acrobat,10.0.1
4,cpe:2.3:a:adobe:acrobat:10.0.1:-:pro:*:*:*:*:*,Adobe Acrobat 10.0.1 Professional,a,adobe,acrobat,10.0.1
...,...,...,...,...,...,...
995,cpe:2.3:a:alfresco:alfresco:1.6.1:*:*:*:*:andr...,Alfresco 1.6.1 for Android,a,alfresco,alfresco,1.6.1
996,cpe:2.3:a:alfresco:alfresco:1.7.0:*:*:*:*:andr...,Alfresco 1.7.0 for Android,a,alfresco,alfresco,1.7.0
997,cpe:2.3:a:alfresco:alfresco:1.8.0:*:*:*:*:andr...,Alfresco 1.8.0 for Android,a,alfresco,alfresco,1.8.0
998,cpe:2.3:a:alfresco:alfresco:1.8.1:*:*:*:*:andr...,Alfresco 1.8.1 for Android,a,alfresco,alfresco,1.8.1


In [2]:
import requests
import pandas as pd
import time

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
API_KEY = "ea5501a5-24fe-4720-80e3-2abed401d92f"

def fetch_cves_for_cpe(cpe_name, start_date, end_date, results_per_page=200):
    headers = {"apiKey": API_KEY}
    params = {
        "resultsPerPage": results_per_page,
        "startIndex": 0,
        "pubStartDate": f"{start_date}T00:00:00.000Z",
        "pubEndDate": f"{end_date}T23:59:59.999Z",
        "cpeName": cpe_name,
    }
    all_vulnerabilities = []
    while True:
        response = requests.get(API_URL, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            vulnerabilities = data.get("vulnerabilities", [])
            all_vulnerabilities.extend(vulnerabilities)
            total_results = data.get("totalResults", 0)
            print(f"CPE {cpe_name}: Fetched {len(vulnerabilities)} records. Total expected: {total_results}")
            params["startIndex"] += results_per_page
            if params["startIndex"] >= total_results:
                break
            time.sleep(6)  # NVD API rate limiting
        elif response.status_code == 403:
            print("Error: Invalid API Key or permissions.")
            break
        else:
            print(f"Error: {response.status_code} - {response.text}")
            break
    return all_vulnerabilities

# MAIN FLOW
whitelist = pd.read_csv("../data/whitelist1.csv")["cpeName"].dropna().tolist()
start_date = "2023-01-01"
end_date = "2024-12-31"

all_vulnerabilities = []
for cpe_name in whitelist:
    vulns = fetch_cves_for_cpe(cpe_name, start_date, end_date)
    all_vulnerabilities.extend(vulns)

print(f"Total unique vulnerabilities fetched: {len(all_vulnerabilities)}")

# Filtering and DataFrame construction
filtered_data = []
for vuln in all_vulnerabilities:
    cve_id = vuln["cve"]["id"]
    descriptions = vuln["cve"].get("descriptions", [])
    description = next((desc["value"] for desc in descriptions if desc.get("lang") == "en"), "N/A")
    metrics = vuln["cve"].get("metrics", {})
    cvss_score, attack_vector = "N/A", "N/A"
    if "cvssMetricV31" in metrics:
        cvss_data = metrics["cvssMetricV31"][0].get("cvssData", {})
        cvss_score = cvss_data.get("baseScore", "N/A")
        attack_vector = cvss_data.get("vectorString", "N/A")
    elif "cvssMetricV30" in metrics:
        cvss_data = metrics["cvssMetricV30"][0].get("cvssData", {})
        cvss_score = cvss_data.get("baseScore", "N/A")
        attack_vector = cvss_data.get("vectorString", "N/A")
    elif "cvssMetricV2" in metrics:
        cvss_data = metrics["cvssMetricV2"][0].get("cvssData", {})
        cvss_score = cvss_data.get("baseScore", "N/A")
        attack_vector = cvss_data.get("vectorString", "N/A")
    # OS extraction from CPEs
    os_list = set()
    configurations = vuln["cve"].get("configurations", [])
    for config in configurations:
        for node in config.get("nodes", []):
            for cpe_match in node.get("cpeMatch", []):
                cpe_uri = cpe_match.get("criteria", "")
                if cpe_uri.startswith("cpe:2.3:"):
                    parts = cpe_uri.split(":")
                    if len(parts) >= 5 and parts[2] == 'o':
                        vendor = parts[3].replace("_", " ").title()
                        product = parts[4].replace("_", " ").title()
                        version = parts[5] if len(parts) > 5 else ""
                        if version not in ["*", "-"]:
                            os_list.add(f"{vendor} {product} {version}".strip())
                        else:
                            os_list.add(f"{vendor} {product}".strip())
    os_info = ", ".join(os_list) if os_list else "N/A"
    if cvss_score != "N/A" and attack_vector != "N/A":
        filtered_data.append({
            "CVE ID": cve_id,
            "Description": description,
            "CVSS Score": cvss_score,
            "Attack Vector": attack_vector,
            "Affected OS": os_info
        })
df = pd.DataFrame(filtered_data)
if not df.empty:
    df.to_csv("nvd_vulnerabilities_with_os.csv", index=False)
    print(f"Saved {len(df)} records with OS information to CSV.")
else:
    print("No records with both CVSS Score and Attack Vector found.")

Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 
Error: 404 - 


KeyboardInterrupt: 

In [None]:
(cpe_uri: str) -> list[dict]