Skip to content
This repository has been archived by the owner on Feb 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #7 from cisagov/v1.1.0
Browse files Browse the repository at this point in the history
v1.1.0 update
  • Loading branch information
mcdonnnj committed Jan 11, 2022
2 parents f8f9af2 + fd51f18 commit 09f5540
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 24 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ The common YAML format looks like this:
```yaml
---
version: '1.0'
owners:
- name: cisagov
url: https://github.com/cisagov/log4j-affected-db
software:
- cves:
- affected_versions:
Expand All @@ -37,15 +40,14 @@ software:
- 1.2
investigated: true
unaffected_versions: []
last_updated: "2021-12-09"
notes: Blah blah blah
product: ProductA
references:
- https://www.reddit.com/r/Vendor1/comments/abcdef/log4j
reporter: cisagov
vendor: Vendor1
vendor_links:
- https://vendor1.com/discussion/comment/622612/#Comment_622612
last_updated: "2021-12-09"
...
```
Expand Down
2 changes: 1 addition & 1 deletion src/_log4j_md_yml_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""This file defines the version of this module."""
__version__ = "1.0.1"
__version__ = "1.1.0"
44 changes: 35 additions & 9 deletions src/mdyml/convert_cisagov.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# Standard Python Libraries
from datetime import datetime, timezone
import html
from itertools import groupby
import logging
import sys
from typing import Any
Expand All @@ -36,6 +37,7 @@
from . import DEFAULT_CVE_ID, MD_LINK_RE, ORDERED_CVE_IDS, __version__

RAW_URL = "https://raw.githubusercontent.com/cisagov/log4j-affected-db/develop/SOFTWARE-LIST.md"
SOFTWARE_LIST_FILE_FORMAT = "cisagov_{}.yml"

EXPECTED_COLUMN_NAMES = [
"vendor",
Expand Down Expand Up @@ -145,7 +147,6 @@ def convert() -> None:

out_dict["notes"] = in_row_dict["notes"]
out_dict["references"] = [in_row_dict["references"]]
out_dict["reporter"] = "cisagov"
# Parse the existing date, or not
if parsed_date := dateparser.parse(in_row_dict["last_updated"]):
# Check if parsed date has a timezone
Expand All @@ -160,15 +161,40 @@ def convert() -> None:

out_dict_list.append(out_dict)

doc = {"version": "1.0", "software": out_dict_list}
out_dict_groups = {
k: list(g)
for k, g in groupby(out_dict_list, key=lambda s: s["vendor"][0].upper())
}

non_letter_groups = list()
for key in list(out_dict_groups.keys()):
if not key.isalpha():
non_letter_groups.extend(out_dict_groups[key])
del out_dict_groups[key]
out_dict_groups["Non-Alphabet"] = non_letter_groups

for key, data in out_dict_groups.items():
filename = SOFTWARE_LIST_FILE_FORMAT.format(key)
logging.debug("Writing data for '%s' to '%s'", key, filename)
with open(filename, "w") as out_file:
doc = {
"version": "1.0",
"owners": [
{
"name": "cisagov",
"url": "https://github.com/cisagov/log4j-affected-db",
}
],
"software": data,
}

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, offset=2, sequence=4)
yaml.explicit_start = True
yaml.explicit_end = True
yaml.sort_base_mapping_type_on_output = False
yaml.allow_unicode = True
yaml.dump(doc, sys.stdout)
yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, offset=2, sequence=4)
yaml.explicit_start = True
yaml.explicit_end = True
yaml.sort_base_mapping_type_on_output = False
yaml.allow_unicode = True
yaml.dump(doc, out_file)


def main() -> None:
Expand Down
56 changes: 45 additions & 11 deletions src/yml/normalize_yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
>0 An error occurred.
Usage:
normalize-yml [--log-level=LEVEL] <yml_file>...
normalize-yml [--log-level=LEVEL] [--cisagov-format] <yml_file>...
normalize-yml (-h | --help)
Options:
-h --help Show this message.
-c --cisagov-format Generate a YAML file using the cisagov software list
format.
--log-level=LEVEL If specified, then the log level will be set to
the specified value. Valid values are "debug", "info",
"warning", "error", and "critical". [default: info]
Expand All @@ -25,33 +27,46 @@

# Third-Party Libraries
import docopt
import ruamel.yaml
from schema import And, Schema, SchemaError, Use
import yaml

from . import __version__

Software = list[dict[str, Any]]
Owners = list[dict[str, str]]
YamlData = dict[str, Software | Owners]


def munge(filenames: list[str]) -> Software:
"""Munge together the "software" nodes from YAML files into a single Python dictionary."""
def munge(filenames: list[str], canonical=False) -> YamlData:
"""Munge together the "owners" and "software" nodes from YAML files into a single Python dictionary."""
ans = []
owners = []
for filename in filenames:
with open(filename, "r") as f:
ans.extend(yaml.safe_load(f)["software"])
loaded_data = yaml.safe_load(f)
if "owners" in loaded_data:
owners.extend(loaded_data["owners"])
if not canonical:
for product in loaded_data["software"]:
product["reporter"] = loaded_data.get("owners", [])
ans.extend(loaded_data["software"])

return ans
# De-duplicate owner information
owners = list({i["name"] + i["url"]: i for i in owners}.values())

return {"owners": owners, "software": ans}

def normalize(software: Software) -> Software:

def normalize(data: YamlData) -> YamlData:
"""Normalize the software entries."""
return software
return data


def sort(software: Software) -> Software:
def sort(data: YamlData) -> YamlData:
"""Sort the software entries."""
software.sort(key=lambda x: (x["vendor"] + x["product"]).lower())
return software
data["software"].sort(key=lambda x: (x["vendor"] + x["product"]).lower())
return data


def main() -> None:
Expand Down Expand Up @@ -87,7 +102,26 @@ def main() -> None:
)

# Do that voodoo that you do so well...
print(yaml.dump(sort(normalize(munge(validated_args["<yml_file>"])))))
if validated_args["--cisagov-format"]:
data: YamlData = sort(
normalize(munge(validated_args["<yml_file>"], canonical=True))
)
doc = {
"version": "1.0",
"owners": data["owners"],
"software": data["software"],
}
else:
data: YamlData = sort(normalize(munge(validated_args["<yml_file>"])))
doc = data["software"]

yml = ruamel.yaml.YAML()
yml.indent(mapping=2, offset=2, sequence=4)
yml.explicit_start = True
yml.explicit_end = True
yml.sort_base_mapping_type_on_output = False
yml.allow_unicode = True
yml.dump(doc, sys.stdout)

# Stop logging and clean up
logging.shutdown()
4 changes: 3 additions & 1 deletion src/ymlmd/yml2md.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def generate_markdown(software: Software) -> None:
),
notes=s["notes"],
references="; ".join([x for x in s["references"] if len(x) != 0]),
reporter=s["reporter"],
reporter=", ".join(
f"[{i['name']}]({i['url']})" for i in s["reporter"]
),
# create a datetime from string and print date portion
last_updated=dateparser.parse(s["last_updated"]).strftime(
"%Y-%m-%d"
Expand Down

0 comments on commit 09f5540

Please sign in to comment.