# NVD Data Frame

In [1]:
import json
import pandas as pd
import glob
import re
import numpy as np 

## Load and Parse CVE Data

In [2]:
def get_nested_value(entry, keys, default='Missing_Data'):
    try:
        for key in keys:
            entry = entry[key]
        return entry
    except (KeyError, IndexError):
        return default

row_accumulator = []
for filename in glob.glob('../Data/NVD/nvd.jsonl'):
    with open(filename, 'r', encoding='utf-8') as f:
        nvd_data = json.load(f)
        for entry in nvd_data:
            new_row = {
                'CVE': get_nested_value(entry, ['cve', 'id']),
                'Published': get_nested_value(entry, ['cve', 'published']),
                'AttackVector': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackVector']),
                'AttackComplexity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackComplexity']),
                'PrivilegesRequired': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'privilegesRequired']),
                'UserInteraction': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'userInteraction']),
                'Scope': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'scope']),
                'ConfidentialityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'confidentialityImpact']),
                'IntegrityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'integrityImpact']),
                'AvailabilityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'availabilityImpact']),
                'BaseScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseScore'], '0.0'),
                'BaseSeverity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseSeverity']),
                'ExploitabilityScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'exploitabilityScore']),
                'ImpactScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'impactScore']),
                'CWE': get_nested_value(entry, ['cve', 'weaknesses', 0, 'description', 0, 'value']),
                'Description': get_nested_value(entry, ['cve', 'descriptions', 0, 'value'], ''),
                'Assigner': get_nested_value(entry, ['cve', 'sourceIdentifier']),
                'Tag': get_nested_value(entry, ['cve', 'cveTags', 0, 'tags'], np.nan),
                'Status': get_nested_value(entry, ['cve', 'vulnStatus'], '')
            }
            row_accumulator.append(new_row)

nvd = pd.DataFrame(row_accumulator)
nvd = nvd[~nvd.Status.str.contains('Rejected')]
nvd['Published'] = pd.to_datetime(nvd['Published'])
nvd = nvd.sort_values(by=['Published'])
nvd = nvd.reset_index(drop=True)
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.nan);


In [3]:
nvd

Unnamed: 0,CVE,Published,AttackVector,AttackComplexity,PrivilegesRequired,UserInteraction,Scope,ConfidentialityImpact,IntegrityImpact,AvailabilityImpact,BaseScore,BaseSeverity,ExploitabilityScore,ImpactScore,CWE,Description,Assigner,Tag,Status
0,CVE-1999-0095,1988-10-01 04:00:00.000,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,NVD-CWE-Other,"The debug command in Sendmail is enabled, allo...",cve@mitre.org,,Modified
1,CVE-1999-0082,1988-11-11 05:00:00.000,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,NVD-CWE-Other,CWD ~root command in ftpd allows root access.,cve@mitre.org,,Modified
2,CVE-1999-1471,1989-01-01 05:00:00.000,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,NVD-CWE-Other,Buffer overflow in passwd in BSD based operati...,cve@mitre.org,,Modified
3,CVE-1999-1122,1989-07-26 04:00:00.000,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,NVD-CWE-Other,Vulnerability in restore in SunOS 4.0.3 and ea...,cve@mitre.org,,Modified
4,CVE-1999-1467,1989-10-26 04:00:00.000,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,NVD-CWE-Other,Vulnerability in rcp on SunOS 4.0.x allows rem...,cve@mitre.org,,Modified
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273133,CVE-2025-21986,2025-04-01 16:15:30.010,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,Missing_Data,"In the Linux kernel, the following vulnerabili...",416baaa9-dc9f-4396-8d5f-8c081fb06d67,,Received
273134,CVE-2025-25041,2025-04-01 17:15:44.967,LOCAL,LOW,LOW,NONE,UNCHANGED,NONE,HIGH,NONE,5.5,MEDIUM,1.8,3.6,Missing_Data,A vulnerability in the HPE Aruba Networking Vi...,security-alert@hpe.com,,Received
273135,CVE-2025-27829,2025-04-01 17:15:46.303,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,Missing_Data,An issue was discovered in Stormshield Network...,cve@mitre.org,,Received
273136,CVE-2025-28131,2025-04-01 17:15:46.493,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,,Missing_Data,Missing_Data,Missing_Data,Missing_Data,A Broken Access Control vulnerability in Nagio...,cve@mitre.org,,Received
