# CPE Data
---

In [1]:
from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly
import warnings
import calplot
from itables import init_notebook_mode, show
import itables.options as opt

opt.dom = "tpir"
opt.style = "table-layout:auto;width:auto"
init_notebook_mode(all_interactive=True, connected=True)

@register_cell_magic
def markdown(line, cell):
    return Markdown(cell.format(**globals()))


logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)

In [2]:
row_accumulator = []

for filename in glob.glob('nvd.jsonl'):
    with open(filename, 'r', encoding='utf-8') as f:
        nvd_data = json.load(f)
        for entry in nvd_data:
            if 'configurations' in entry['cve']:
                for config in entry['cve']['configurations']:
                    for node in config['nodes']:
                        for cpe in node['cpeMatch']:
                            if cpe['vulnerable']:
                                        cve = entry['cve']['id']
                                        try:
                                            published_date = entry['cve']['published']
                                        except KeyError:
                                            published_date = 'Missing_Data'
                                        cpe_string = cpe['criteria']
                                        try:
                                            end = cpe['versionEndExcluding']
                                        except: 
                                            end = 'None'
                                        try:
                                            end2 = cpe['versionEndIncluding']
                                        except: 
                                            end2 = 'None' 
                                        try:
                                            start = cpe['versionStartExcluding']
                                        except: 
                                            start = 'None'
                                        try:
                                            start2 = cpe['versionStartIncluding']
                                        except: 
                                            start2 = 'None'     
                                        new_row = { 
                                        'CVE': cve,
                                        'Published': published_date,
                                        'CPE' : cpe_string,
                                        'StartI' : start,
                                        'StartE' : start2,
                                        'EndI' : end,
                                        'EndE' : end2 
                                        }
                                        row_accumulator.append(new_row)
                                 
                                
      
nvd = pd.DataFrame(row_accumulator)
thisyear = ((nvd['Published'] > '2000-01-01') & (nvd['Published']  < '2025-01-01'))
nvd = nvd.loc[thisyear]
nvd = nvd.sort_values(by=['Published'])

## CPE Data

### CVEs With Most CPEs

In [3]:
show(nvd['CVE'].value_counts().head(20), scrollY="400px", scrollCollapse=True, paging=False)

Unnamed: 0_level_0,count
CVE,Unnamed: 1_level_1
Loading ITables v2.2.4 from the internet... (need help?),


### Most Common CPEs

In [4]:
show(nvd['CPE'].value_counts().head(20), scrollY="400px", scrollCollapse=True, paging=False)

Unnamed: 0_level_0,count
CPE,Unnamed: 1_level_1
Loading ITables v2.2.4 from the internet... (need help?),


### Unique CPEs

In [5]:
nvd['CPE'].nunique()

347966

In [6]:
Markdown(f"This report is updated automatically every day, last generated on: **{datetime.datetime.now()}**")

This report is updated automatically every day, last generated on: **2025-01-25 12:23:29.988991**