In [1]:
import requests
import csv
import time
import logging


In [2]:
# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)


In [3]:
import http.client as http_client
http_client.HTTPConnection.debuglevel = 1


In [4]:
# Define a function to capture HTTP client debug output
def http_client_debug(*args):
    logging.debug(' '.join(args))

# Patch the print function in the http.client module to use the custom function
http_client.print = http_client_debug


In [6]:
import requests
import csv
import time
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define the API endpoint and parameters
url = 'https://services.nvd.nist.gov/rest/json/cves/2.0'
headers = {'apiKey': 'd0f53b52-e8ce-43c6-8e8d-20a6183c9a56'}
batch_size = 100  # Number of records per request
total_records = 2000  # Total number of records desired

# Initialize a list to hold all CVE items
all_cves = []

# Calculate the number of batches needed
num_batches = total_records // batch_size

# Loop to fetch multiple pages
for batch in range(num_batches):
    start_index = batch * batch_size
    params = {'startIndex': start_index, 'resultsPerPage': batch_size}
    logging.info(f"Fetching data with startIndex={start_index}")
    
    # Make the API request
    response = requests.get(url, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        logging.info(f"Successfully retrieved data for startIndex={start_index}")
        # Parse the JSON content
        data = response.json()
        
        # Extract the list of CVE items
        cve_items = data.get('vulnerabilities', [])
        
        # Add the current batch of CVEs to the list
        all_cves.extend(cve_items)
        
        # Respect NVD API rate limiting
        logging.info("Sleeping for 6 seconds to respect rate limiting.")
        time.sleep(6)  # NVD recommends sleeping for 6 seconds between requests
    else:
        logging.error(f"Failed to retrieve data: {response.status_code}")
        break

# Specify the CSV filename
csv_filename = 'nvd_cves.csv'

# Define the CSV headers
csv_headers = [
    'cve_id',
    'description',
    'published_date',
    'last_modified_date',
    'base_score',
    'base_severity'
]

# Open the CSV file in write mode and save the data
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
    writer.writeheader()
    
    for cve in all_cves:
        cve_data = cve.get('cve', {})
        cve_id = cve_data.get('id', 'N/A')
        descriptions = cve_data.get('descriptions', [])
        description = next((desc['value'] for desc in descriptions if desc['lang'] == 'en'), 'N/A')
        published_date = cve_data.get('published', 'N/A')
        last_modified_date = cve_data.get('lastModified', 'N/A')
        
        # Extract CVSS metrics if available
        metrics = cve_data.get('metrics', {})
        cvss_metrics = metrics.get('cvssMetricV2', [])
        if cvss_metrics:
            base_score = cvss_metrics[0].get('cvssData', {}).get('baseScore', 'N/A')
            base_severity = cvss_metrics[0].get('baseSeverity', 'N/A')
        else:
            base_score = 'N/A'
            base_severity = 'N/A'
        
        # Write the row to the CSV file
        writer.writerow({
            'cve_id': cve_id,
            'description': description,
            'published_date': published_date,
            'last_modified_date': last_modified_date,
            'base_score': base_score,
            'base_severity': base_severity
        })

logging.info(f"Data successfully saved to {csv_filename}")


2025-01-13 17:28:42,749 - INFO - Fetching data with startIndex=0
2025-01-13 17:28:42,753 - DEBUG - Starting new HTTPS connection (1): services.nvd.nist.gov:443
2025-01-13 17:28:43,474 - DEBUG - send: b'GET /rest/json/cves/2.0?startIndex=0&resultsPerPage=100 HTTP/1.1\r\nHost: services.nvd.nist.gov\r\nUser-Agent: python-requests/2.32.3\r\nAccept-Encoding: gzip, deflate, br\r\nAccept: */*\r\nConnection: keep-alive\r\napiKey: d0f53b52-e8ce-43c6-8e8d-20a6183c9a56\r\n\r\n'
2025-01-13 17:28:43,988 - DEBUG - reply: 'HTTP/1.1 200 OK\r\n'
2025-01-13 17:28:43,989 - DEBUG - header: content-type: application/json
2025-01-13 17:28:43,990 - DEBUG - header: x-frame-options: SAMEORIGIN
2025-01-13 17:28:43,990 - DEBUG - header: access-control-allow-origin: *
2025-01-13 17:28:43,990 - DEBUG - header: access-control-allow-headers: accept, apiKey, content-type, origin, x-requested-with
2025-01-13 17:28:43,990 - DEBUG - header: access-control-allow-methods: GET, HEAD, OPTIONS
2025-01-13 17:28:43,991 - DEBUG