Skip to content
This repository has been archived by the owner on Feb 2, 2023. It is now read-only.

Commit

Permalink
Output separate YAML files for cisagov conversion
Browse files Browse the repository at this point in the history
Instead of printing one large YAML file to stdout we instead group the
read software entires by the uppercased first character of the vendor
name and output a file for each of these groups. All non-alphabet
vendor entries are grouped in the "0-9" group manually. When testing
the only entries that were non-alphabet were numeric. This will generate
files that are small enough to be edited using the GitHub web UI.
  • Loading branch information
mcdonnnj committed Jan 4, 2022
1 parent 8dc74ed commit 5d33941
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions src/mdyml/convert_cisagov.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# Standard Python Libraries
from datetime import datetime, timezone
import html
from itertools import groupby
import logging
import sys
from typing import Any
Expand All @@ -36,6 +37,7 @@
from . import DEFAULT_CVE_ID, MD_LINK_RE, ORDERED_CVE_IDS, __version__

RAW_URL = "https://raw.githubusercontent.com/cisagov/log4j-affected-db/develop/SOFTWARE-LIST.md"
SOFTWARE_LIST_FILE_FORMAT = "cisagov_{}.yml"

EXPECTED_COLUMN_NAMES = [
"vendor",
Expand Down Expand Up @@ -160,15 +162,31 @@ def convert() -> None:

out_dict_list.append(out_dict)

doc = {"version": "1.0", "software": out_dict_list}

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, offset=2, sequence=4)
yaml.explicit_start = True
yaml.explicit_end = True
yaml.sort_base_mapping_type_on_output = False
yaml.allow_unicode = True
yaml.dump(doc, sys.stdout)
out_dict_groups = {
k: list(g)
for k, g in groupby(out_dict_list, key=lambda s: s["vendor"][0].upper())
}

non_letter_groups = list()
for key in list(out_dict_groups.keys()):
if not key.isalpha():
non_letter_groups.extend(out_dict_groups[key])
del out_dict_groups[key]
out_dict_groups["0-9"] = non_letter_groups

for key, data in out_dict_groups.items():
filename = SOFTWARE_LIST_FILE_FORMAT.format(key)
logging.debug("Writing data for '%s' to '%s'", key, filename)
with open(filename, "w") as out_file:
doc = {"version": "1.0", "software": data}

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, offset=2, sequence=4)
yaml.explicit_start = True
yaml.explicit_end = True
yaml.sort_base_mapping_type_on_output = False
yaml.allow_unicode = True
yaml.dump(doc, out_file)


def main() -> None:
Expand Down

0 comments on commit 5d33941

Please sign in to comment.