# Import Libraries

In [None]:
import os
import subprocess
from datetime import datetime, timedelta

import requests
from tqdm import tqdm

# Download NVD JSONL File

In [2]:
url = 'https://nvd.handsonhacking.org/nvd.jsonl'
nvd_folder = './NVD'
nvd_file_path = os.path.join(nvd_folder, 'nvd.jsonl')

# Create the NVD folder if it doesn't exist
os.makedirs(nvd_folder, exist_ok=True)

# Delete the old file if it exists and is older than 24 hours
if os.path.exists(nvd_file_path):
    file_mod_time = datetime.fromtimestamp(os.path.getmtime(nvd_file_path))
    if datetime.now() - file_mod_time > timedelta(hours=24):
        os.remove(nvd_file_path)

# Download the new file with a progress bar
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
block_size = 1024
tqdm_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
with open(nvd_file_path, 'wb') as file:
    for data in response.iter_content(block_size):
        tqdm_bar.update(len(data))
        file.write(data)
tqdm_bar.close()
if total_size != 0 and tqdm_bar.n != total_size:
    print('ERROR: Something went wrong')
else:
    print(f'Downloaded {nvd_file_path}')

100%|██████████| 1.28G/1.28G [00:38<00:00, 33.6MiB/s]

Downloaded ./NVD/nvd.jsonl





# Download CVE Files from GitHub Repository

In [3]:
repo_url = 'https://github.com/CVEProject/cvelistV5.git'
cve_folder = './CVE'

# Clone the repository if the folder doesn't exist, otherwise pull the latest changes
try:
    if not os.path.exists(cve_folder) or not os.listdir(cve_folder):
        if os.path.exists(cve_folder):
            subprocess.run(['rm', '-rf', cve_folder])
        subprocess.run(['git', 'clone', '-q', repo_url, cve_folder], check=True)
    else:
        subprocess.run(['git', '-C', cve_folder, 'pull', '-q'], check=True)
    print('Repository is up to date')
except subprocess.CalledProcessError:
    print('Error occurred, removing .git folder and re-cloning the repository')
    git_folder = os.path.join(cve_folder, '.git')
    if os.path.exists(git_folder):
        subprocess.run(['rm', '-rf', git_folder])
    subprocess.run(['git', 'clone', '-q', repo_url, cve_folder], check=True)
    print('Repository re-cloned successfully')

Repository is up to date


# Download Schema Files

In [4]:
schema_folder = './schemas'
os.makedirs(schema_folder, exist_ok=True)

schema_urls = [
    'https://raw.githubusercontent.com/CVEProject/cve-schema/main/schema/CVE_Record_Format.json',
    'https://csrc.nist.gov/schema/nvd/api/2.0/cve_api_json_2.0.schema'
]

for url in schema_urls:
    file_name = url.split('/')[-1]
    file_path = os.path.join(schema_folder, file_name)
    response = requests.get(url)
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_path}')


Downloaded ./schemas/CVE_Record_Format.json
Downloaded ./schemas/cve_api_json_2.0.schema
