From e6ba4c43fe40f7e4cd5dfd09568cacc12281e3ca Mon Sep 17 00:00:00 2001 From: Eric Beahan Date: Wed, 23 Sep 2020 14:18:43 -0500 Subject: [PATCH] [1.x] Introduce `--strict` flag (#937) (#975) --- CHANGELOG.next.md | 5 ++- Makefile | 2 +- USAGE.md | 45 +++++++++++++++++++++++ scripts/generator.py | 4 +- scripts/generators/ecs_helpers.py | 15 ++++++++ scripts/schema/cleaner.py | 18 ++++++--- scripts/tests/unit/test_schema_cleaner.py | 20 ++++++++++ 7 files changed, 99 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index 4f624151f1..1984f60434 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -10,8 +10,6 @@ Thanks, you're awesome :-) --> ### Schema Changes -* Added `threat.technique.subtechnique` to capture MITRE ATT&CK® subtecqhniques. #951 - #### Breaking changes #### Bugfixes @@ -21,6 +19,7 @@ Thanks, you're awesome :-) --> #### Added * Added Mime Type fields to HTTP request and response. #944 +* Added `threat.technique.subtechnique` to capture MITRE ATT&CK® subtechniques. #951 #### Improvements @@ -38,6 +37,8 @@ Thanks, you're awesome :-) --> #### Added +* Introduced `--strict` flag to perform stricter schema validation when running the generator script. #937 + #### Improvements * Field details Jinja2 template components have been consolidated into one template #897 diff --git a/Makefile b/Makefile index e2826b46bc..37617d391e 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,7 @@ generate: legacy_use_cases codegen generator # Run the new generator .PHONY: generator generator: - $(PYTHON) scripts/generator.py --include "${INCLUDE}" + $(PYTHON) scripts/generator.py --strict --include "${INCLUDE}" # Generate Go code from the schema. .PHONY: gocodegen diff --git a/USAGE.md b/USAGE.md index 334879892e..ffa2ca2e06 100644 --- a/USAGE.md +++ b/USAGE.md @@ -29,6 +29,7 @@ relevant artifacts for their unique set of data sources. + [Subset](#subset) + [Ref](#ref) + [Mapping & Template Settings](#mapping--template-settings) + + [Strict Mode](#strict-mode) + [Intermediate-Only](#intermediate-only) ## Terminology @@ -294,6 +295,50 @@ The `--template-settings` argument defines [index level settings](https://www.el For `template.json`, the `mappings` object is left empty: `{}`. Likewise the `properties` object remains empty in the `mapping.json` example. This will be filled in automatically by the script. +#### Strict Mode + +The `--strict` argument enables "strict mode". Strict mode performs a stricter validation step against the schema's contents. + +Basic usage: + +``` +$ python/generator.py --strict +``` + +Strict mode requires the following conditions, else the script exits on an exception: + +* Short descriptions must be less than or equal to 120 characters. + +The current artifacts generated and published in the ECS repo will always be created using strict mode. However, older ECS versions (pre `v1.5.0`) will cause +an exception if attempting to generate them using `--strict`. This is due to schema validation checks introduced after that version was released. + +Example: + +``` +$ python scripts/generator.py --ref v1.4.0 --strict +Loading schemas from git ref v1.4.0 +Running generator. ECS version 1.4.0 +... +ValueError: Short descriptions must be single line, and under 120 characters (current length: 134). +Offending field or field set: number +Short description: + Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. +``` + +Removing `--strict` will display a warning message, but the script will finish its run successfully: + +``` +$ python scripts/generator.py --ref v1.4.0 +Loading schemas from git ref v1.4.0 +Running generator. ECS version 1.4.0 +/Users/ericbeahan/dev/ecs/scripts/generators/ecs_helpers.py:176: UserWarning: Short descriptions must be single line, and under 120 characters (current length: 134). +Offending field or field set: number +Short description: + Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. + +This will cause an exception when running in strict mode. +``` + #### Intermediate-Only The `--intermediate-only` argument is used for debugging purposes. It only generates the ["intermediate files"](generated/ecs), `ecs_flat.yml` and `ecs_nested.yml`, without generating the rest of the artifacts. diff --git a/scripts/generator.py b/scripts/generator.py index b7ae2a4b2f..733f4155fe 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -41,7 +41,7 @@ def main(): # ecs_helpers.yaml_dump('ecs.yml', fields) fields = loader.load_schemas(ref=args.ref, included_files=args.include) - cleaner.clean(fields) + cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) fields = subset_filter.filter(fields, args.subset, out_dir) nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs) @@ -72,6 +72,8 @@ def argument_parser(): help='index template settings to use when generating elasticsearch template') parser.add_argument('--mapping-settings', action='store', help='mapping settings to use when generating elasticsearch template') + parser.add_argument('--strict', action='store_true', + help='enforce stricter checking at schema cleanup') args = parser.parse_args() # Clean up empty include of the Makefile if args.include and [''] == args.include: diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index 911a3c9968..275c0569ac 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -2,6 +2,7 @@ import os import yaml import git +import warnings from collections import OrderedDict from copy import deepcopy @@ -159,3 +160,17 @@ def list_extract_keys(lst, key_name): def is_intermediate(field): '''Encapsulates the check to see if a field is an intermediate field or a "real" field.''' return ('intermediate' in field['field_details'] and field['field_details']['intermediate']) + + +# Warning helper + + +def strict_warning(msg): + """Call warnings.warn(msg) for operations that would throw an Exception + if operating in `--strict` mode. Allows a custom message to be passed. + + :param msg: custom text which will be displayed with wrapped boilerplate + for strict warning messages. + """ + warn_message = f"{msg}\n\nThis will cause an exception when running in strict mode." + warnings.warn(warn_message) diff --git a/scripts/schema/cleaner.py b/scripts/schema/cleaner.py index ec48598bea..5f62b2daac 100644 --- a/scripts/schema/cleaner.py +++ b/scripts/schema/cleaner.py @@ -19,7 +19,9 @@ # deal with final field names either. -def clean(fields): +def clean(fields, strict=False): + global strict_mode + strict_mode = strict visitor.visit_fields(fields, fieldset_func=schema_cleanup, field_func=field_cleanup) @@ -46,7 +48,7 @@ def schema_cleanup(schema): else: schema['schema_details']['prefix'] = schema['field_details']['name'] + '.' normalize_reuse_notation(schema) - # Final validity check + # Final validity check if in strict mode schema_assertions_and_warnings(schema) @@ -73,7 +75,7 @@ def schema_mandatory_attributes(schema): def schema_assertions_and_warnings(schema): '''Additional checks on a fleshed out schema''' - single_line_short_description(schema) + single_line_short_description(schema, strict=strict_mode) def normalize_reuse_notation(schema): @@ -165,7 +167,8 @@ def field_mandatory_attributes(field): def field_assertions_and_warnings(field): '''Additional checks on a fleshed out field''' if not ecs_helpers.is_intermediate(field): - single_line_short_description(field) + # check short description length if in strict mode + single_line_short_description(field, strict=strict_mode) if field['field_details']['level'] not in ACCEPTABLE_FIELD_LEVELS: msg = "Invalid level for field '{}'.\nValue: {}\nAcceptable values: {}".format( field['field_details']['name'], field['field_details']['level'], @@ -178,7 +181,7 @@ def field_assertions_and_warnings(field): SHORT_LIMIT = 120 -def single_line_short_description(schema_or_field): +def single_line_short_description(schema_or_field, strict=True): short_length = len(schema_or_field['field_details']['short']) if "\n" in schema_or_field['field_details']['short'] or short_length > SHORT_LIMIT: msg = "Short descriptions must be single line, and under {} characters (current length: {}).\n".format( @@ -186,4 +189,7 @@ def single_line_short_description(schema_or_field): msg += "Offending field or field set: {}\nShort description:\n {}".format( schema_or_field['field_details']['name'], schema_or_field['field_details']['short']) - raise ValueError(msg) + if strict: + raise ValueError(msg) + else: + ecs_helpers.strict_warning(msg) diff --git a/scripts/tests/unit/test_schema_cleaner.py b/scripts/tests/unit/test_schema_cleaner.py index ed82218706..4c20fac01f 100644 --- a/scripts/tests/unit/test_schema_cleaner.py +++ b/scripts/tests/unit/test_schema_cleaner.py @@ -262,6 +262,26 @@ def test_multiline_short_description_raises(self): with self.assertRaisesRegex(ValueError, 'single line'): cleaner.single_line_short_description(schema) + def test_very_long_short_description_warns_strict_disabled(self): + schema = {'field_details': { + 'name': 'fake_schema', + 'short': "Single line but really long. " * 10}} + try: + with self.assertWarnsRegex(UserWarning, 'under 120 characters \(current length: 290\)'): + cleaner.single_line_short_description(schema, strict=False) + except Exception: + self.fail("cleaner.single_line_short_description() raised Exception unexpectedly.") + + def test_multiline_short_description_warns_strict_disabled(self): + schema = {'field_details': { + 'name': 'fake_schema', + 'short': "multiple\nlines"}} + try: + with self.assertWarnsRegex(UserWarning, 'single line'): + cleaner.single_line_short_description(schema, strict=False) + except Exception: + self.fail("cleaner.single_line_short_description() raised Exception unexpectedly.") + def test_clean(self): '''A high level sanity test''' fields = self.schema_process()