## 2025 YOY Growth Data

In [1]:
%%capture
#!rm *.jsonl  
#!wget -q https://nvd.handsonhacking.org/nvd.jsonl

In [None]:
# Import necessary libraries
from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
from datetime import date
import calplot
import glob
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings

# Register a cell magic for Markdown
@register_cell_magic
def markdown(line, cell):
    return Markdown(cell.format(**globals()))

# Configure logging and warnings
logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")

# Set pandas display options
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)

In [3]:
def get_nested_value(entry, keys, default='Missing_Data'):
    try:
        for key in keys:
            entry = entry[key]
        return entry
    except (KeyError, IndexError):
        return default

row_accumulator = []
for filename in glob.glob('nvd.jsonl'):
    with open(filename, 'r', encoding='utf-8') as f:
        nvd_data = json.load(f)
        for entry in nvd_data:
            new_row = {
                'CVE': get_nested_value(entry, ['cve', 'id']),
                'Published': get_nested_value(entry, ['cve', 'published']),
                'AttackVector': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackVector']),
                'AttackComplexity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackComplexity']),
                'PrivilegesRequired': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'privilegesRequired']),
                'UserInteraction': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'userInteraction']),
                'Scope': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'scope']),
                'ConfidentialityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'confidentialityImpact']),
                'IntegrityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'integrityImpact']),
                'AvailabilityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'availabilityImpact']),
                'BaseScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseScore'], '0.0'),
                'BaseSeverity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseSeverity']),
                'ExploitabilityScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'exploitabilityScore']),
                'ImpactScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'impactScore']),
                'CWE': get_nested_value(entry, ['cve', 'weaknesses', 0, 'description', 0, 'value']),
                'Description': get_nested_value(entry, ['cve', 'descriptions', 0, 'value'], ''),
                'Assigner': get_nested_value(entry, ['cve', 'sourceIdentifier']),
                'Tag': get_nested_value(entry, ['cve', 'cveTags', 0, 'tags'], np.nan),
                'Status': get_nested_value(entry, ['cve', 'vulnStatus'], '')
            }
            row_accumulator.append(new_row)

nvd = pd.DataFrame(row_accumulator)
d = nvd[~nvd.Status.str.contains('Rejected')]
nvd['Published'] = pd.to_datetime(nvd['Published'])
thisyear = ((nvd['Published'] > '2025-01-01') & (nvd['Published']  < '2025-3-01'))
lastyear = ((nvd['Published'] > '2024-01-01') & (nvd['Published']  < '2024-3-01'))

nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.nan);


nvd_2025 = nvd.loc[thisyear]
nvd_2025 = nvd_2025.sort_values(by=['Published'])
nvd_2024 = nvd_2025.reset_index(drop=True)
nvdcount_2025 = nvd_2025['Published'].count()
startdate_2025 = date(2025, 1, 1)
enddate_2025  = date(2025, 2, 1)
numberofdays = enddate_2025 - startdate_2025 
per_day_2025 = nvdcount_2025/numberofdays.days


nvd_2024 = nvd.loc[lastyear]
nvd_2024 = nvd_2024.sort_values(by=['Published'])
nvd_2024 = nvd_2024.reset_index(drop=True)
nvdcount_2024 = nvd_2024['Published'].count()
startdate_2024 = date(2024, 1, 1)
enddate_2024  = date(2024, 2, 1)
numberofdays = enddate_2024 - startdate_2024 
per_day_2024 = nvdcount_2024/numberofdays.days



In [4]:
Markdown(f"2025<br />Total Number of CVEs: **{nvd_2025['CVE'].count()}**<br />Average CVEs Per Day: **{per_day_2025.round(2)}**<br />Average CVSS Score: **{nvd_2025['BaseScore'].mean().round(2)}**")

2025<br />Total Number of CVEs: **8246**<br />Average CVEs Per Day: **266.0**<br />Average CVSS Score: **6.58**

In [5]:
Markdown(f"2024<br />Total Number of CVEs: **{nvd_2024['CVE'].count()}**<br />Average CVEs Per Day: **{per_day_2024.round(2)}**<br />Average CVSS Score: **{nvd_2024['BaseScore'].mean().round(2)}**")

2024<br />Total Number of CVEs: **5553**<br />Average CVEs Per Day: **179.13**<br />Average CVSS Score: **6.7**