Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding --out flag #748

Merged
merged 3 commits into from
Feb 14, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Thanks, you're awesome :-) -->
* schema_reader.py now reliably supports chaining reusable fieldsets together. #722
* Allow the artifact generator to consider and output only a subset of fields. #737
* Add support for reusing fields in places other than the top level of the destination fieldset. #739
* Add support for specifying the directory to write the generated files. #748

#### Deprecated

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ setup: ve

# Run the ECS tests
.PHONY: test
test:
test: ve
$(PYTHON) -m unittest discover --start-directory scripts/tests

# Create a virtualenv to run Python.
Expand Down
28 changes: 20 additions & 8 deletions scripts/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ def main():

# Maybe load user specified directory of schemas
if args.include:
include_glob = os.path.join(args.include, '*.yml')
include_glob = schema_reader.get_glob_files(args.include, schema_reader.YAML_EXT)

print('Loading user defined schemas: {0}'.format(include_glob))

intermediate_custom = schema_reader.load_schemas(sorted(glob.glob(include_glob)))
intermediate_custom = schema_reader.load_schemas(include_glob)
schema_reader.merge_schema_fields(intermediate_fields, intermediate_custom)

if args.subset:
Expand All @@ -40,24 +40,36 @@ def main():
intermediate_fields = ecs_helpers.fields_subset(subset, intermediate_fields)

(nested, flat) = schema_reader.generate_nested_flat(intermediate_fields)
intermediate_files.generate(nested, flat)

# default location to save files
out_dir = 'generated'
docs_dir = 'docs'
if args.out:
out_dir = os.path.join(args.out, out_dir)
docs_dir = os.path.join(args.out, docs_dir)

ecs_helpers.make_dirs(out_dir)
ecs_helpers.make_dirs(docs_dir)

intermediate_files.generate(nested, flat, out_dir)
if args.intermediate_only:
exit()

csv_generator.generate(flat, ecs_version)
es_template.generate(flat, ecs_version)
beats.generate(nested, ecs_version)
asciidoc_fields.generate(nested, flat, ecs_version)
csv_generator.generate(flat, ecs_version, out_dir)
es_template.generate(flat, ecs_version, out_dir)
beats.generate(nested, ecs_version, out_dir)
asciidoc_fields.generate(nested, flat, ecs_version, docs_dir)


def argument_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--intermediate-only', action='store_true',
help='generate intermediary files only')
parser.add_argument('--include', action='store',
parser.add_argument('--include', action='append',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the change for append here, if you pass in multiple --include flags it will only use the last one. This causes the values to be added to an array.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I think we should align how we do multiple values, between --subset and --include.

  • include: --include path1 --include path2
  • subset: --subset path1 path2

I have a preference for how we do --subset because it shows up clearly in the help:

usage: generator.py [-h] [--intermediate-only] [--include INCLUDE]
                    [--subset SUBSET [SUBSET ...]] [--out OUT]

optional arguments:
  -h, --help            show this help message and exit
  --intermediate-only   generate intermediary files only
  --include INCLUDE     include user specified directory of custom field
                        definitions
  --subset SUBSET [SUBSET ...]
                        render a subset of the schema
  --out OUT             directory to store the generated files

And I suspect --subset will play better with users using globs without quoting, and their shell expanding to a list of file names.

WDYT? Are there downsides nargs='+'?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good point, didn't think of that. I don't think there should be any issue with switching it over to nargs. I'll update it.

help='include user specified directory of custom field definitions')
parser.add_argument('--subset', nargs='+',
help='render a subset of the schema')
parser.add_argument('--out', action='store', help='directory to store the generated files')
return parser.parse_args()


Expand Down
11 changes: 5 additions & 6 deletions scripts/generators/asciidoc_fields.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import sys

from os.path import join
from generators import ecs_helpers


def generate(ecs_nested, ecs_flat, ecs_version):
save_asciidoc('docs/fields.asciidoc', page_field_index(ecs_nested, ecs_version))
save_asciidoc('docs/field-details.asciidoc', page_field_details(ecs_nested))
save_asciidoc('docs/field-values.asciidoc', page_field_values(ecs_flat))
def generate(ecs_nested, ecs_flat, ecs_version, out_dir):
save_asciidoc(join(out_dir, 'fields.asciidoc'), page_field_index(ecs_nested, ecs_version))
save_asciidoc(join(out_dir, 'field-details.asciidoc'), page_field_details(ecs_nested))
save_asciidoc(join(out_dir, 'field-values.asciidoc'), page_field_values(ecs_flat))

# Helpers

Expand Down
12 changes: 6 additions & 6 deletions scripts/generators/beats.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import yaml

from os.path import join
from collections import OrderedDict
from generators import ecs_helpers


def generate(ecs_nested, ecs_version):
def generate(ecs_nested, ecs_version, out_dir):
# Load temporary whitelist for default_fields workaround.
df_whitelist = ecs_helpers.yaml_load('scripts/generators/beats_default_fields_whitelist.yml')

Expand All @@ -28,7 +27,7 @@ def generate(ecs_nested, ecs_version):
beats_file['description'] = 'ECS Fields.'
beats_file['fields'] = beats_fields

write_beats_yaml(beats_file, ecs_version)
write_beats_yaml(beats_file, ecs_version, out_dir)


def fieldset_field_array(source_fields, df_whitelist):
Expand Down Expand Up @@ -65,9 +64,10 @@ def fieldset_field_array(source_fields, df_whitelist):
# Helpers


def write_beats_yaml(beats_file, ecs_version):
def write_beats_yaml(beats_file, ecs_version, out_dir):
ecs_helpers.make_dirs(join(out_dir, 'beats'))
warning = file_header().format(version=ecs_version)
ecs_helpers.yaml_dump('generated/beats/fields.ecs.yml', [beats_file], preamble=warning)
ecs_helpers.yaml_dump(join(out_dir, 'beats/fields.ecs.yml'), [beats_file], preamble=warning)


# Templates
Expand Down
8 changes: 6 additions & 2 deletions scripts/generators/csv_generator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import csv
import sys

from os.path import join
from generator import ecs_helpers

def generate(ecs_flat, version):

def generate(ecs_flat, version, out_dir):
ecs_helpers.make_dirs(join(out_dir, 'csv'))
sorted_fields = base_first(ecs_flat)
save_csv('generated/csv/fields.csv', sorted_fields, version)
save_csv(join(out_dir, 'csv/fields.csv'), sorted_fields, version)


def base_first(ecs_flat):
Expand Down
8 changes: 8 additions & 0 deletions scripts/generators/ecs_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import yaml
import os

from collections import OrderedDict
from copy import deepcopy
Expand Down Expand Up @@ -96,6 +97,13 @@ def dict_rename_keys(dict, renames):

# File helpers

def make_dirs(path):
try:
os.makedirs(path, exist_ok=True)
except OSError as e:
print('Unable to create output directory: {}'.format(e))
raise e


def yaml_dump(filename, data, preamble=None):
with open(filename, 'w') as outfile:
Expand Down
12 changes: 7 additions & 5 deletions scripts/generators/es_template.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import json
import sys

from os.path import join
from generators import ecs_helpers


def generate(ecs_flat, ecs_version):
def generate(ecs_flat, ecs_version, out_dir):
field_mappings = {}
for flat_name in sorted(ecs_flat):
field = ecs_flat[flat_name]
Expand All @@ -14,8 +15,8 @@ def generate(ecs_flat, ecs_version):
mappings_section = mapping_settings(ecs_version)
mappings_section['properties'] = field_mappings

generate_template_version(6, mappings_section)
generate_template_version(7, mappings_section)
generate_template_version(6, mappings_section, out_dir)
generate_template_version(7, mappings_section, out_dir)

# Field mappings

Expand Down Expand Up @@ -65,14 +66,15 @@ def entry_for(field):
# Generated files


def generate_template_version(elasticsearch_version, mappings_section):
def generate_template_version(elasticsearch_version, mappings_section, out_dir):
ecs_helpers.make_dirs(join(out_dir, 'elasticsearch', str(elasticsearch_version)))
template = template_settings()
if elasticsearch_version == 6:
template['mappings'] = {'_doc': mappings_section}
else:
template['mappings'] = mappings_section

filename = "generated/elasticsearch/{}/template.json".format(elasticsearch_version)
filename = join(out_dir, "elasticsearch/{}/template.json".format(elasticsearch_version))
save_json(filename, template)


Expand Down
8 changes: 5 additions & 3 deletions scripts/generators/intermediate_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from generators import ecs_helpers
from os.path import join


def generate(ecs_nested, ecs_flat):
ecs_helpers.yaml_dump('generated/ecs/ecs_flat.yml', ecs_flat)
ecs_helpers.yaml_dump('generated/ecs/ecs_nested.yml', ecs_nested)
def generate(ecs_nested, ecs_flat, out_dir):
ecs_helpers.make_dirs(join(out_dir, 'ecs'))
ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_flat.yml'), ecs_flat)
ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_nested.yml'), ecs_nested)
11 changes: 11 additions & 0 deletions scripts/schema_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@

# File loading stuff

YAML_EXT = ('*.yml', '*.yaml')


def get_glob_files(paths, file_types):
all_files = []
for path in paths:
for t in file_types:
all_files.extend(glob.glob(os.path.join(path, t)))

return sorted(all_files)


def ecs_files():
"""Return the schema file list to load"""
Expand Down