# NVD Program Cost

The contract period analyzed in this project spans from **November 30, 2023**, to **November 30, 2023**, During this period, Analygence will receive upto **$125,000,000** from NIST to enrich CVE Data with CVSS, CPE and CWE data. 

[Contract Link](https://www.highergov.com/idv/1333ND24DNB770002/)

In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import re

In [2]:
contract_value = 25000000

## Load and Parse CVE Data

In [3]:
all_rows = []
base_dir = "CVE/cves"

# Loop through each year directory
for year_dir in os.listdir(base_dir):
    if year_dir.isdigit():
        year_path = os.path.join(base_dir, year_dir)
        if os.path.isdir(year_path):
            for root, dirs, files in os.walk(year_path):
                for filename in files:
                    if filename.endswith(".json"):
                        filepath = os.path.join(root, filename)
                        try:
                            with open(filepath, "r") as file:
                                cve_data = json.load(file)

                                meta = cve_data.get("cveMetadata", {})
                                containers = cve_data.get("containers", {})
                                cna = containers.get("cna", {})

                                cve_id = meta.get("cveId", None)
                                state = meta.get("state", None)
                                assigner_short = meta.get("assignerShortName", None)
                                date_reserved = meta.get("dateReserved", None)
                                date_published = meta.get("datePublished", None)
                                date_updated = meta.get("dateUpdated", None)
                                
                                desc_en = next(
                                    (d.get("value") for d in cna.get("descriptions", []) if d.get("lang") == "en"),
                                    None
                                )

                                # Collect any CVSS base scores by version
                                cvss_scores_v3 = [
                                    metric["cvssV3_1"]["baseScore"]
                                    for metric in cna.get("metrics", [])
                                    if "cvssV3_1" in metric
                                ]
                                cvss_scores_v4 = [
                                    metric["cvssV4_0"]["baseScore"]
                                    for metric in cna.get("metrics", [])
                                    if "cvssV4_0" in metric
                                ]
                                cvss_scores_v2 = [
                                    metric["cvssV2_0"]["baseScore"]
                                    for metric in cna.get("metrics", [])
                                    if "cvssV2_0" in metric
                                ]

                                # Only take the first score if available
                                cvss_score_v3_1 = cvss_scores_v3[0] if cvss_scores_v3 else None
                                cvss_score_v4_0 = cvss_scores_v4[0] if cvss_scores_v4 else None
                                cvss_score_v2_0 = cvss_scores_v2[0] if cvss_scores_v2 else None

                                # Only take the first CWE if present
                                cwe_ids = []
                                for problem_type in cna.get("problemTypes", []):
                                    for desc in problem_type.get("descriptions", []):
                                        if "cweId" in desc:
                                            cwe_ids.append(desc["cweId"])
                                cwe_id = cwe_ids[0] if cwe_ids else None
                                
                                provider_meta = cna.get("providerMetadata", {})
                                cna_short_name = provider_meta.get("shortName", None)

                                # Collect affected products and versions
                                affected_products = [
                                    f"{aff.get('vendor', 'n/a')} {aff.get('product', 'n/a')} {ver.get('version', 'n/a')}"
                                    for aff in cna.get("affected", [])
                                    for ver in aff.get("versions", [])
                                ]

                                # Collect credits
                                credits = [
                                    f"{credit['value']} ({credit.get('type', 'unknown')})"
                                    for credit in cna.get("credits", [])
                                ]

                                # Collect impacts
                                impacts = [
                                    impact.get("capecId", "unknown")
                                    for impact in cna.get("impacts", [])
                                ]

                                # Collect references
                                references = [
                                    ref.get("url", "unknown")
                                    for ref in cna.get("references", [])
                                ]

                                all_rows.append({
                                    "CVE ID": cve_id,
                                    "State": state,
                                    "Assigner Org": assigner_short,
                                    "Date Reserved": date_reserved,
                                    "Date Published": date_published,
                                    "Date Updated": date_updated,
                                    "CVE Description": desc_en,
                                    "CVSS Score (v3.1)": cvss_score_v3_1,
                                    "CVSS Score (v4.0)": cvss_score_v4_0,
                                    "CVSS Score (v2.0)": cvss_score_v2_0,
                                    "CWE ID": cwe_id,
                                    "CNA Short Name": cna_short_name,
                                    "Affected Products": affected_products,
                                    "Credits": credits,
                                    "Impacts": impacts,
                                    "References": references
                                })
                        except Exception as e:
                            print(f"Error processing file {filepath}: {e}")

cve_df = pd.DataFrame(all_rows)

In [4]:
cve_df = cve_df[['CVE ID', 'Date Published', 'CVE Description', 'CNA Short Name']]
cve_df.loc[:, 'Date Published'] = pd.to_datetime(cve_df['Date Published'], format='mixed', utc=True)

In [5]:
# Define the date range
start_date = pd.Timestamp("1900-01-01", tz="UTC")
end_date = pd.Timestamp("2025-04-16", tz="UTC")

# Filter the DataFrame
cve_df_all_records = cve_df[(cve_df['Date Published'] >= start_date) & (cve_df['Date Published'] <= end_date)]

In [6]:
contract_start_date = pd.Timestamp("2024-04-17", tz="UTC")
contract_end_date = pd.Timestamp("2025-04-16", tz="UTC")

cve_df_contract_period = cve_df[(cve_df['Date Published'] >= contract_start_date) & (cve_df['Date Published'] <= contract_end_date)]

In [None]:
cve_count_contract = len(cve_df_contract_period)
cve_contract_cost = contract_value / cve_count_contract 
cve_contract_cost = round(cve_contract_cost, 2)
print(f"Analygence contract value was ${cve_contract_cost:,.2f} for each of the {cve_count_contract:,} CVEs published during the contract period.")

Analygence received $573.07 for each of the 43,625 CVEs published during the contract period.


In [None]:
cve_count_all = len(cve_df_all_records)
cve_total_cost = contract_value / cve_count_all 
cve_total_cost = round(cve_total_cost, 2)
print(f"Analygence contract value was ${cve_total_cost:,.2f} for each of the {cve_count_all:,} CVEs in the CVE database during the contract period.")

Analygence received $87.19 for each of the 286,729 CVEs in the CVE database during the contract period.


In [16]:

cna_counts_df = cve_df_all_records['CNA Short Name'].value_counts().reset_index()
cna_counts_df.columns = ['CNA Short Name', 'Count']
cna_counts_df['Cost'] = cna_counts_df['Count'] * cve_total_cost
cna_counts_df['CNA Short Name'] = cna_counts_df['CNA Short Name'].str.title()
cna_counts_df['Cost'] = cna_counts_df['Cost'].apply(lambda x: f"${x:,.2f}")
cna_counts_df.to_csv('nvd_program_contract_total_value.csv', index=False)
cna_counts_df

Unnamed: 0,CNA Short Name,Count,Cost
0,Mitre,109619,"$9,557,680.61"
1,Redhat,11653,"$1,016,025.07"
2,Microsoft,11631,"$1,014,106.89"
3,Patchstack,10008,"$872,597.52"
4,Oracle,8201,"$715,045.19"
...,...,...,...
386,Seal,1,$87.19
387,Tqtc,1,$87.19
388,Roche,1,$87.19
389,Illumio,1,$87.19


In [17]:

cna_counts_df_contract_period = cve_df_contract_period['CNA Short Name'].value_counts().reset_index()
cna_counts_df_contract_period.columns = ['CNA Short Name', 'Count']
cna_counts_df_contract_period['Cost'] = cna_counts_df_contract_period['Count'] * cve_contract_cost
cna_counts_df_contract_period['CNA Short Name'] = cna_counts_df_contract_period['CNA Short Name'].str.title()
cna_counts_df_contract_period['Cost'] = cna_counts_df_contract_period['Cost'].apply(lambda x: f"${x:,.2f}")
cna_counts_df_contract_period.to_csv('nvd_program_contract_period_value.csv', index=False)
cna_counts_df_contract_period

Unnamed: 0,CNA Short Name,Count,Cost
0,Patchstack,6441,"$3,691,143.87"
1,Mitre,5880,"$3,369,651.60"
2,Linux,5196,"$2,977,671.72"
3,Wordfence,3886,"$2,226,950.02"
4,Vuldb,3144,"$1,801,732.08"
...,...,...,...
316,Tigera,1,$573.07
317,Krcert,1,$573.07
318,Softiron,1,$573.07
319,Roche,1,$573.07
