In [None]:
import json
import numpy as np
import pandas as pd
import os
from json_schema_for_humans.generate import generate_from_filename
from json_schema_for_humans.generation_configuration import GenerationConfiguration

In [6]:


def list_keys(schema, parent_key=''):
    keys = []
    if isinstance(schema, dict):
        for key, value in schema.items():
            full_key = f"{parent_key}.{key}" if parent_key else key
            if isinstance(value, dict):
                key_type = value.get('type', 'object')
                key_format = value.get('format', '')
                key_pattern = value.get('pattern', '')
                key_description = value.get('description', '')
                keys.append((full_key, key_type, key_format, key_pattern, key_description))
                keys.extend(list_keys(value, full_key))
            elif isinstance(value, list):
                key_type = 'array'
                keys.append((full_key, key_type, '', '', ''))
                for item in value:
                    keys.extend(list_keys(item, full_key))
            else:
                key_type = 'string'
                keys.append((full_key, key_type, '', '', ''))
    return keys

# Load the JSON schema
with open('../Data/Schemas/cve_api_json_2.0.schema', 'r') as file:
    schema = json.load(file)

# List keys under "definitions"
definitions_keys = list_keys(schema.get('definitions', {}))

# Convert the list of keys into a DataFrame
df = pd.DataFrame(definitions_keys, columns=['Keys', 'Type', 'Format', 'Pattern', 'Description'])

# Convert dictionary values to strings
df = df.astype(str)

# Sort the DataFrame by 'Keys' and then by 'Type'
df = df.sort_values(by=['Keys', 'Type'])

# Create the output directory if it doesn't exist
output_dir = '../NVD Program/Files'
os.makedirs(output_dir, exist_ok=True)

# Export the sorted DataFrame to a CSV file
output_file = os.path.join(output_dir, 'NVD_Schema.csv')
df.to_csv(output_file, index=False)
df

Unnamed: 0,Keys,Type,Format,Pattern,Description
212,config,object,,,
213,config.properties,object,,,
217,config.properties.negate,boolean,,,
218,config.properties.negate.type,string,,,
219,config.properties.nodes,array,,,
...,...,...,...,...,...
201,weakness.properties.source,string,,,
202,weakness.properties.source.type,string,,,
203,weakness.properties.type,string,,,
204,weakness.properties.type.type,string,,,


In [4]:

# Replace empty strings with NaN
df.replace('', np.nan, inplace=True)

# Calculate the percentage of each column that has a value
percentages = df.notnull().mean() * 100
percentages_df = percentages.reset_index()
percentages_df.columns = ['Column', 'Percentage']

# Display the DataFrame with percentages
percentages_df

Unnamed: 0,Column,Percentage
0,Keys,100.0
1,Type,100.0
2,Format,2.290076
3,Pattern,0.763359
4,Description,3.816794


In [None]:
config = GenerationConfiguration(
            template_name="md_nested",
            deprecated_from_description=True,
            footer_show_time=False
        )

generate_from_filename("../Data/Schemas/cve_api_json_2.0.schema", "Files/nvd_schema.md", config=config)

== Generating nvd_schema.md ==
== Generated nvd_schema.md in 0:00:01.000555 ==
