In [7]:
import json
import numpy as np
import pandas as pd
import os
from json_schema_for_humans.generate import generate_from_filename
from json_schema_for_humans.generation_configuration import GenerationConfiguration

In [11]:
def extract_keys(schema, parent_key=''):
    keys = []
    for key, value in schema.items():
        full_key = f'{parent_key}.{key}' if parent_key else key
        if isinstance(value, dict):
            description = value.get('description', '')
            type_ = value.get('type', '')
            format_ = value.get('format', '')
            pattern = value.get('pattern', '')
            keys.append((full_key, description, type_, format_, pattern))
            # Only extend keys if the value is a dictionary and has properties
            if 'properties' in value:
                keys.extend(extract_keys(value['properties'], full_key))
        else:
            keys.append((full_key, '', '', '', ''))
    return keys

# Load the schema from the JSON file
with open('../Data/Schemas/CVE_Record_Format.json') as f:
    schema = json.load(f)

# Extract keys from the definitions section of the schema
definitions = schema.get('definitions', {})
keys = extract_keys(definitions)
df_keys = pd.DataFrame(keys, columns=['Key', 'Description', 'Type', 'Format', 'Pattern'])

# Sort the DataFrame by the 'Key' column
df_keys_sorted = df_keys.sort_values(by='Key')

# Create the output directory if it doesn't exist
output_dir = '../CVE Program/files'
os.makedirs(output_dir, exist_ok=True)

# Export the sorted DataFrame to a CSV file
output_file = os.path.join(output_dir, 'CVE_Schema.csv')
df_keys_sorted.to_csv(output_file, index=False)

df_keys_sorted

Unnamed: 0,Key,Description,Type,Format,Pattern
96,adpContainer,An object containing the vulnerability informa...,object,,
101,adpContainer.affected,,,,
107,adpContainer.configurations,,,,
102,adpContainer.cpeApplicability,,array,,
112,adpContainer.credits,,,,
...,...,...,...,...,...
0,uriType,"A universal resource identifier (URI), accordi...",string,uri,
10,userId,A UUID for a user participating in the CVE pro...,,,
1,uuidType,A version 4 (random) universally unique identi...,string,,^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}...
14,version,"A single version of a product, as expressed in...",string,,


In [12]:
# Replace empty strings with NaN
df_keys.replace('', np.nan, inplace=True)

# Calculate the percentage of each column that has a value
percentages = df_keys.notnull().mean() * 100
percentages_df = percentages.reset_index()
percentages_df.columns = ['Column', 'Percentage']
percentages_df

Unnamed: 0,Column,Percentage
0,Key,100.0
1,Description,62.411348
2,Type,50.35461
3,Format,1.41844
4,Pattern,7.092199


In [16]:
config = GenerationConfiguration(
            template_name="md_nested",
            deprecated_from_description=True,
            footer_show_time=False
        )

generate_from_filename("../Data/Schemas/CVE_Record_Format.json", "Files/cve_schema.md", config=config)

== Generating cve_schema.md ==
== Generated cve_schema.md in 0:00:00.123435 ==
