In [1]:
!pip install boto3




In [2]:
import requests
import json
import os
from datetime import datetime, timedelta
import time
import boto3
from botocore.exceptions import ClientError

In [3]:
# Use this code snippet in your app.
# If you need more information about configurations
# or implementing the sample code, visit the AWS docs:
# https://aws.amazon.com/developer/language/python/

def get_secret():

    secret_name = "NVD_API"
    region_name = "us-east-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        # For a list of exceptions thrown, see
        # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
        raise e

    secret = get_secret_value_response['SecretString']

In [4]:

# Define constants
API_KEY = get_secret()
BASE_URL = 'https://services.nvd.nist.gov/rest/json/cves/2.0'
HEADERS = {'apiKey': API_KEY}
RESULTS_PER_PAGE = 100  # NVD's recommended value to balance the load
OUTPUT_DIR = 'NVD_CVE_7-23'
SLEEP_TIME = 6  # Delay in seconds between requests

# Create output directory if not exists
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Define the date range for the past month
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=30)

# Format dates to ISO 8601 format
start_date_str = start_date.strftime('%Y-%m-%dT%H:%M:%S.000Z')
end_date_str = end_date.strftime('%Y-%m-%dT%H:%M:%S.000Z')

# Initialize pagination parameters
start_index = 0
total_results = 1  # Initialize to a non-zero value to enter the loop

# Function to save a CVE record to a file
def save_cve_record(cve_record, output_dir):
    cve_id = cve_record['cve']['id']
    with open(f'{output_dir}/{cve_id}.json', 'w') as file:
        json.dump(cve_record, file, indent=2)

# Retrieve and save CVE records
while start_index < total_results:
    params = {
        'startIndex': start_index,
        'resultsPerPage': RESULTS_PER_PAGE,
        'pubStartDate': start_date_str,
        'pubEndDate': end_date_str,
        'noRejected': None,
        'cvssV3Severity': 'HIGH'  # Filter for high severity
    }
    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        try:
            data = response.json()
        except json.JSONDecodeError:
            print(f'Error decoding JSON response at index {start_index}.')
            print('Response text:', response.text)
            break
        
        # Update pagination info
        total_results = data.get('totalResults', 0)
        start_index += RESULTS_PER_PAGE
        
        # Save each CVE record
        for cve in data.get('vulnerabilities', []):
            save_cve_record(cve, OUTPUT_DIR)
        
        print(f'Retrieved {len(data.get("vulnerabilities", []))} CVE records. Total so far: {start_index}.')
    else:
        print(f'Failed to retrieve data: {response.status_code}')
        print('Response text:', response.text)
        break
    
    # Sleep for 6 seconds before the next request
    time.sleep(SLEEP_TIME)

print('Data retrieval and storage complete.')


Retrieved 100 CVE records. Total so far: 100.
Retrieved 100 CVE records. Total so far: 200.
Retrieved 100 CVE records. Total so far: 300.
Retrieved 72 CVE records. Total so far: 400.
Data retrieval and storage complete.
