# Import Libraries

In [2]:
import os
import requests
from tqdm import tqdm
import subprocess


# Download NVD JSONL File

In [3]:
url = 'https://nvd.handsonhacking.org/nvd.jsonl'
nvd_folder = './NVD'
nvd_file_path = os.path.join(nvd_folder, 'nvd.jsonl')

# Create the NVD folder if it doesn't exist
os.makedirs(nvd_folder, exist_ok=True)

# Delete the old file if it exists
if os.path.exists(nvd_file_path):
    os.remove(nvd_file_path)

# Download the new file with a progress bar
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
block_size = 1024
tqdm_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
with open(nvd_file_path, 'wb') as file:
    for data in response.iter_content(block_size):
        tqdm_bar.update(len(data))
        file.write(data)
tqdm_bar.close()
if total_size != 0 and tqdm_bar.n != total_size:
    print('ERROR: Something went wrong')
else:
    print(f'Downloaded {nvd_file_path}')


100%|██████████| 1.28G/1.28G [00:28<00:00, 45.0MiB/s]

Downloaded ./NVD/nvd.jsonl





# Download CVE Files from GitHub Repository

In [4]:
repo_url = 'https://github.com/CVEProject/cvelistV5.git'
cve_folder = './CVE'

# Clone the repository if the folder doesn't exist, otherwise pull the latest changes
try:
    if not os.path.exists(cve_folder) or not os.listdir(cve_folder):
        if os.path.exists(cve_folder):
            subprocess.run(['rm', '-rf', cve_folder])
        subprocess.run(['git', 'clone', repo_url, cve_folder], check=True)
    else:
        subprocess.run(['git', '-C', cve_folder, 'pull'], check=True)
    print('Repository is up to date')
except subprocess.CalledProcessError:
    print('Error occurred, removing .git folder and re-cloning the repository')
    git_folder = os.path.join(cve_folder, '.git')
    if os.path.exists(git_folder):
        subprocess.run(['rm', '-rf', git_folder])
    subprocess.run(['git', 'clone', repo_url, cve_folder], check=True)
    print('Repository re-cloned successfully')


From https://github.com/CVEProject/cvelistV5
   07bf9a9aa20..333e3da91f4  main       -> origin/main
 * [new tag]                 2025-03-06_baseline -> 2025-03-06_baseline
 * [new tag]                 cve_2025-03-05_2100Z -> cve_2025-03-05_2100Z
 * [new tag]                 cve_2025-03-05_2200Z -> cve_2025-03-05_2200Z
 * [new tag]                 cve_2025-03-05_2300Z -> cve_2025-03-05_2300Z
 * [new tag]                 cve_2025-03-05_at_end_of_day -> cve_2025-03-05_at_end_of_day
 * [new tag]                 cve_2025-03-06_0000Z -> cve_2025-03-06_0000Z
 * [new tag]                 cve_2025-03-06_0100Z -> cve_2025-03-06_0100Z
 * [new tag]                 cve_2025-03-06_0200Z -> cve_2025-03-06_0200Z
 * [new tag]                 cve_2025-03-06_0300Z -> cve_2025-03-06_0300Z
 * [new tag]                 cve_2025-03-06_0400Z -> cve_2025-03-06_0400Z
 * [new tag]                 cve_2025-03-06_0500Z -> cve_2025-03-06_0500Z
 * [new tag]                 cve_2025-03-06_0600Z -> cve_2025-03-06_0600

Updating 426b8c75f55..333e3da91f4
Fast-forward
 cves/2019/20xxx/CVE-2019-20208.json |     22 +-
 cves/2020/24xxx/CVE-2020-24829.json |     19 +-
 cves/2020/5xxx/CVE-2020-5026.json   |     34 +-
 cves/2021/20xxx/CVE-2021-20251.json |     40 +-
 cves/2021/30xxx/CVE-2021-30014.json |     19 +-
 cves/2021/30xxx/CVE-2021-30022.json |     19 +-
 cves/2021/33xxx/CVE-2021-33351.json |     68 +-
 cves/2021/33xxx/CVE-2021-33352.json |     68 +-
 cves/2021/35xxx/CVE-2021-35377.json |     40 +-
 cves/2021/36xxx/CVE-2021-36392.json |     40 +-
 cves/2021/36xxx/CVE-2021-36393.json |     40 +-
 cves/2021/36xxx/CVE-2021-36394.json |     52 +-
 cves/2021/3xxx/CVE-2021-3854.json   |     34 +-
 cves/2021/40xxx/CVE-2021-40574.json |     20 +-
 cves/2021/45xxx/CVE-2021-45477.json |     34 +-
 cves/2021/45xxx/CVE-2021-45478.json |     34 +-
 cves/2021/4xxx/CVE-2021-4328.json   |     34 +-
 cves/2022/1xxx/CVE-2022-1586.json   |     39 +-
 cves/2022/33xxx/CVE-2022-33256.json |     34 +-
 cves/2022/43xxx/CVE-2

# Download Schema Files

In [5]:
schema_folder = './schemas'
os.makedirs(schema_folder, exist_ok=True)

schema_urls = [
    'https://raw.githubusercontent.com/CVEProject/cve-schema/main/schema/CVE_Record_Format.json',
    'https://csrc.nist.gov/schema/nvd/api/2.0/cve_api_json_2.0.schema'
]

for url in schema_urls:
    file_name = url.split('/')[-1]
    file_path = os.path.join(schema_folder, file_name)
    response = requests.get(url)
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_path}')


Downloaded ./schemas/CVE_Record_Format.json
Downloaded ./schemas/cve_api_json_2.0.schema
