In [1]:
# Listing JSON files
from os import listdir
from os.path import isfile, join
path = join('data', 'extracted')
data_files = [f for f in listdir(path) if isfile(join(path, f))]
print('{} files identified'.format(len(data_files)))
for item in data_files:
    print(item)

19 files identified
nvdcve-1.1-2002.json
nvdcve-1.1-2003.json
nvdcve-1.1-2004.json
nvdcve-1.1-2005.json
nvdcve-1.1-2006.json
nvdcve-1.1-2007.json
nvdcve-1.1-2008.json
nvdcve-1.1-2009.json
nvdcve-1.1-2010.json
nvdcve-1.1-2011.json
nvdcve-1.1-2012.json
nvdcve-1.1-2013.json
nvdcve-1.1-2014.json
nvdcve-1.1-2015.json
nvdcve-1.1-2016.json
nvdcve-1.1-2017.json
nvdcve-1.1-2018.json
nvdcve-1.1-2019.json
nvdcve-1.1-2020.json


In [2]:
# Loading data into memory
import json
full_list = []
total_cves = 0
for data_file in data_files:
    file_path = join(path, data_file)
    print('Processing {}'.format(file_path))
    with open(file_path, encoding='utf-8') as fh:
        contents = json.load(fh)
        contents = contents.get("CVE_Items", None)
        if contents is None:
            raise ValueError('CVE_Items not found in {}'.format(file_path))
        cve_count = len(contents)
        print('Found {} CVEs in {}'.format(cve_count, data_file))
        total_cves += cve_count
        full_list += contents

Processing data\extracted\nvdcve-1.1-2002.json
Found 6748 CVEs in nvdcve-1.1-2002.json
Processing data\extracted\nvdcve-1.1-2003.json
Found 1547 CVEs in nvdcve-1.1-2003.json
Processing data\extracted\nvdcve-1.1-2004.json
Found 2703 CVEs in nvdcve-1.1-2004.json
Processing data\extracted\nvdcve-1.1-2005.json
Found 4758 CVEs in nvdcve-1.1-2005.json
Processing data\extracted\nvdcve-1.1-2006.json
Found 7134 CVEs in nvdcve-1.1-2006.json
Processing data\extracted\nvdcve-1.1-2007.json
Found 6569 CVEs in nvdcve-1.1-2007.json
Processing data\extracted\nvdcve-1.1-2008.json
Found 7155 CVEs in nvdcve-1.1-2008.json
Processing data\extracted\nvdcve-1.1-2009.json
Found 4996 CVEs in nvdcve-1.1-2009.json
Processing data\extracted\nvdcve-1.1-2010.json
Found 5168 CVEs in nvdcve-1.1-2010.json
Processing data\extracted\nvdcve-1.1-2011.json
Found 4806 CVEs in nvdcve-1.1-2011.json
Processing data\extracted\nvdcve-1.1-2012.json
Found 5789 CVEs in nvdcve-1.1-2012.json
Processing data\extracted\nvdcve-1.1-2013.j

In [3]:
print('Total CVEs documented: {}'.format(total_cves))
print('Total CVEs documented: {}'.format(len(full_list)))

Total CVEs documented: 143465
Total CVEs documented: 143465


In [4]:
full_list[3]['cve']

{'cve': {'data_type': 'CVE',
  'data_format': 'MITRE',
  'data_version': '4.0',
  'CVE_data_meta': {'ID': 'CVE-1999-0004', 'ASSIGNER': 'cve@mitre.org'},
  'problemtype': {'problemtype_data': [{'description': [{'lang': 'en',
       'value': 'NVD-CWE-Other'}]}]},
  'references': {'reference_data': [{'url': 'https://docs.microsoft.com/en-us/security-updates/securitybulletins/1998/ms98-008',
     'name': 'MS98-008',
     'refsource': 'MS',
     'tags': []}]},
  'description': {'description_data': [{'lang': 'en',
     'value': 'MIME buffer overflow in email clients, e.g. Solaris mailtool and Outlook.'}]}},
 'configurations': {'CVE_data_version': '4.0',
  'nodes': [{'operator': 'OR',
    'cpe_match': [{'vulnerable': True,
      'cpe23Uri': 'cpe:2.3:a:hp:dtmail:*:*:*:*:*:*:*:*'},
     {'vulnerable': True,
      'cpe23Uri': 'cpe:2.3:a:university_of_washington:pine:4.02:*:*:*:*:*:*:*'}]},
   {'operator': 'OR',
    'cpe_match': [{'vulnerable': True,
      'cpe23Uri': 'cpe:2.3:o:sco:unixware:7.0:

In [5]:
import pandas as pd

In [6]:
df_full = pd.DataFrame(full_list)

In [7]:
df_full.head()

Unnamed: 0,cve,configurations,impact,publishedDate,lastModifiedDate
0,"{'data_type': 'CVE', 'data_format': 'MITRE', '...","{'CVE_data_version': '4.0', 'nodes': [{'operat...","{'baseMetricV2': {'cvssV2': {'version': '2.0',...",1999-12-30T05:00Z,2010-12-16T05:00Z
1,"{'data_type': 'CVE', 'data_format': 'MITRE', '...","{'CVE_data_version': '4.0', 'nodes': [{'operat...","{'baseMetricV2': {'cvssV2': {'version': '2.0',...",1998-10-12T04:00Z,2009-01-26T05:00Z
2,"{'data_type': 'CVE', 'data_format': 'MITRE', '...","{'CVE_data_version': '4.0', 'nodes': [{'operat...","{'baseMetricV2': {'cvssV2': {'version': '2.0',...",1998-04-01T05:00Z,2018-10-30T16:26Z
3,"{'data_type': 'CVE', 'data_format': 'MITRE', '...","{'CVE_data_version': '4.0', 'nodes': [{'operat...","{'baseMetricV2': {'cvssV2': {'version': '2.0',...",1997-12-16T05:00Z,2018-10-12T21:29Z
4,"{'data_type': 'CVE', 'data_format': 'MITRE', '...","{'CVE_data_version': '4.0', 'nodes': [{'operat...","{'baseMetricV2': {'cvssV2': {'version': '2.0',...",1998-07-20T04:00Z,2008-09-09T12:33Z


In [12]:
cves = df_full['cve']

In [13]:
cves = cves.tolist()

In [11]:
cves.head()

Unnamed: 0,cve
0,"{'data_type': 'CVE', 'data_format': 'MITRE', '..."
1,"{'data_type': 'CVE', 'data_format': 'MITRE', '..."
2,"{'data_type': 'CVE', 'data_format': 'MITRE', '..."
3,"{'data_type': 'CVE', 'data_format': 'MITRE', '..."
4,"{'data_type': 'CVE', 'data_format': 'MITRE', '..."


In [15]:
df_cves = pd.DataFrame(cves)

In [16]:
df_cves.head()

Unnamed: 0,data_type,data_format,data_version,CVE_data_meta,problemtype,references,description
0,CVE,MITRE,4.0,"{'ID': 'CVE-1999-0001', 'ASSIGNER': 'cve@mitre...",{'problemtype_data': [{'description': [{'lang'...,{'reference_data': [{'url': 'http://www.openbs...,"{'description_data': [{'lang': 'en', 'value': ..."
1,CVE,MITRE,4.0,"{'ID': 'CVE-1999-0002', 'ASSIGNER': 'cve@mitre...",{'problemtype_data': [{'description': [{'lang'...,{'reference_data': [{'url': 'ftp://patches.sgi...,"{'description_data': [{'lang': 'en', 'value': ..."
2,CVE,MITRE,4.0,"{'ID': 'CVE-1999-0003', 'ASSIGNER': 'cve@mitre...",{'problemtype_data': [{'description': [{'lang'...,{'reference_data': [{'url': 'ftp://patches.sgi...,"{'description_data': [{'lang': 'en', 'value': ..."
3,CVE,MITRE,4.0,"{'ID': 'CVE-1999-0004', 'ASSIGNER': 'cve@mitre...",{'problemtype_data': [{'description': [{'lang'...,{'reference_data': [{'url': 'https://docs.micr...,"{'description_data': [{'lang': 'en', 'value': ..."
4,CVE,MITRE,4.0,"{'ID': 'CVE-1999-0005', 'ASSIGNER': 'cve@mitre...",{'problemtype_data': [{'description': [{'lang'...,{'reference_data': [{'url': 'http://sunsolve.s...,"{'description_data': [{'lang': 'en', 'value': ..."


In [17]:
full_list[3]['cve']

{'data_type': 'CVE',
 'data_format': 'MITRE',
 'data_version': '4.0',
 'CVE_data_meta': {'ID': 'CVE-1999-0004', 'ASSIGNER': 'cve@mitre.org'},
 'problemtype': {'problemtype_data': [{'description': [{'lang': 'en',
      'value': 'NVD-CWE-Other'}]}]},
 'references': {'reference_data': [{'url': 'https://docs.microsoft.com/en-us/security-updates/securitybulletins/1998/ms98-008',
    'name': 'MS98-008',
    'refsource': 'MS',
    'tags': []}]},
 'description': {'description_data': [{'lang': 'en',
    'value': 'MIME buffer overflow in email clients, e.g. Solaris mailtool and Outlook.'}]}}