Skip to content

Commit

Permalink
Better handling of module data dependencies (#943)
Browse files Browse the repository at this point in the history
* Drop of work on dealing with vars in params

* Handling of module dependencies

This changes module loading to happen in a loop with determination that a module load requires unresolved parameters. This prevents unresolvable data from slipping into module data and allows better handling of module dependencies.

* Additional VBM test case

* Debugging integration test

* A little _less_ debugging data. ;-P

* Additional comment (really, just to re-kick build)
  • Loading branch information
robeden committed Mar 7, 2021
1 parent 7040a8c commit 38473ea
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 32 deletions.
138 changes: 108 additions & 30 deletions checkov/terraform/parser.py
Expand Up @@ -36,8 +36,8 @@ def __init__(self):
self._parsed_directories = set()

# This ensures that we don't try to double-load modules
# Tuple is <file>, <module_index> (see _load_modules)
self._loaded_modules: Set[Tuple[str, int]] = set()
# Tuple is <file>, <module_index>, <name> (see _load_modules)
self._loaded_modules: Set[Tuple[str, int, str]] = set()

def _init(self, directory: str, out_definitions: Optional[Dict],
out_evaluations_context: Dict[str, Dict[str, EvaluationContext]],
Expand Down Expand Up @@ -252,16 +252,39 @@ def _internal_dir_load(self, directory: str,
self._process_vars_and_locals(directory, var_value_and_file_map, module_data_retrieval)

# Stage 4: Load modules
self._load_modules(self.directory, module_loader_registry, dir_filter, module_load_context,
keys_referenced_as_modules)

# Stage 5: Variable resolution round 2 - now with modules
if self.evaluate_variables:
self._process_vars_and_locals(directory, var_value_and_file_map, module_data_retrieval)
# This stage needs to be done in a loop (again... alas, no DAG) because modules might not
# be loadable until other modules are loaded. This happens when parameters to one module
# depend on the output of another. For such cases, the base module must be loaded, then
# a parameter resolution pass needs to happen, then the second module can be loaded.
#
# One gotcha is that we need to make sure we load all modules at some point, even if their
# parameters don't resolve. So, if we hit a spot where resolution doesn't change anything
# and there are still modules to be loaded, they will be forced on the next pass.
force_final_module_load = False
for i in range(0, 10): # circuit breaker - no more than 10 loops
logging.debug("Module load loop %d", i)

# Stage 4a: Load eligible modules
has_more_modules = self._load_modules(directory, module_loader_registry,
dir_filter, module_load_context,
keys_referenced_as_modules,
force_final_module_load)

# Stage 4b: Variable resolution round 2 - now with (possibly more) modules
made_var_changes = False
if self.evaluate_variables:
made_var_changes = self._process_vars_and_locals(directory, var_value_and_file_map,
module_data_retrieval)
if not has_more_modules:
break # nothing more to do
elif not made_var_changes:
# If there are more modules to load but no variables were resolved, then to a final module
# load, forcing things through without complete resolution.
force_final_module_load = True

def _process_vars_and_locals(self, directory: str,
var_value_and_file_map: Dict[str, Tuple[Any, str]],
module_data_retrieval: Callable[[str], Dict[str, Any]]):
module_data_retrieval: Callable[[str], Dict[str, Any]]) -> bool:
locals_values = {}
for file_data in self.out_definitions.values():
file_locals = file_data.get("locals")
Expand All @@ -276,8 +299,10 @@ def _process_vars_and_locals(self, directory: str,
# More than a couple loops isn't normally expected.
# NOTE: If this approach proves to be a performance liability, a DAG will be needed.
loop_count = 0
made_change_in_any_loop = False
for i in range(0, 25):
loop_count += 1
logging.debug("Parser loop %d", i)

made_change = False
# Put out file layer here so the context works inside the loop
Expand All @@ -291,21 +316,22 @@ def _process_vars_and_locals(self, directory: str,
# }

if self._process_vars_and_locals_loop(file_data,
eval_context_dict,
os.path.relpath(file, directory),
var_value_and_file_map, locals_values,
file_data.get("resource"),
file_data.get("module"),
module_data_retrieval,
directory):
eval_context_dict,
os.path.relpath(file, directory),
var_value_and_file_map, locals_values,
file_data.get("resource"),
file_data.get("module"),
module_data_retrieval,
directory):
made_change = True

if len(eval_context_dict) == 0:
del self.out_evaluations_context[file]
made_change_in_any_loop = made_change_in_any_loop or made_change
if not made_change:
break

LOGGER.debug("Processing variables took %d loop iterations", loop_count)
return made_change_in_any_loop

def _process_vars_and_locals_loop(self, out_definitions: Dict,
eval_map_by_var_name: Dict[str, EvaluationContext], relative_file_path: str,
Expand Down Expand Up @@ -387,10 +413,10 @@ def process_items_helper(key_value_iterator, data_map, context, allow_str_bool_t
made_change = True
elif isinstance(value, dict):
if self._process_vars_and_locals_loop(value, eval_map_by_var_name, relative_file_path,
var_value_and_file_map,
locals_values, resource_list,
module_list, module_data_retrieval, root_directory,
new_context):
var_value_and_file_map,
locals_values, resource_list,
module_list, module_data_retrieval, root_directory,
new_context):
made_change = True

elif isinstance(value, list):
Expand All @@ -406,10 +432,24 @@ def process_items_helper(key_value_iterator, data_map, context, allow_str_bool_t

def _load_modules(self, root_dir: str, module_loader_registry: ModuleLoaderRegistry,
dir_filter: Callable[[str], bool], module_load_context: Optional[str],
keys_referenced_as_modules: Set[str]):
keys_referenced_as_modules: Set[str], ignore_unresolved_params: bool = False) -> bool:
"""
Load modules which have not already been loaded and can be loaded (don't have unresolved parameters).
:param ignore_unresolved_params: If true, not-yet-loaded modules will be loaded even if they are
passed parameters that are not fully resolved.
:return: True if there were modules that were not loaded due to unresolved
parameters.
"""
all_module_definitions = {}
all_module_evaluations_context = {}
skipped_a_module = False
for file in list(self.out_definitions.keys()):
# Don't process a file in a directory other than the directory we're processing. For example,
# if we're down dealing with <top_dir>/<module>/something.tf, we don't want to rescan files
# up in <top_dir>.
if os.path.dirname(file) != root_dir:
continue
# Don't process a file reference which has already been processed
if file.endswith("]"):
continue
Expand All @@ -422,10 +462,6 @@ def _load_modules(self, root_dir: str, module_loader_registry: ModuleLoaderRegis
continue

for module_index, module_call in enumerate(module_calls):
module_address = (file, module_index)
if module_address in self._loaded_modules:
continue
self._loaded_modules.add(module_address)

if not isinstance(module_call, dict):
continue
Expand All @@ -435,6 +471,25 @@ def _load_modules(self, root_dir: str, module_loader_registry: ModuleLoaderRegis
if not isinstance(module_call_data, dict):
continue

module_address = (file, module_index, module_call_name)
if module_address in self._loaded_modules:
continue

# Variables being passed to module, "source" and "version" are reserved
specified_vars = {k: v[0] for k, v in module_call_data.items()
if k != "source" and k != "version"}

if not ignore_unresolved_params:
has_unresolved_params = False
for k, v in specified_vars.items():
if not is_acceptable_module_param(v) or not is_acceptable_module_param(k):
has_unresolved_params = True
break
if has_unresolved_params:
skipped_a_module = True
continue
self._loaded_modules.add(module_address)

source = module_call_data.get("source")
if not source or not isinstance(source, list):
continue
Expand All @@ -453,10 +508,6 @@ def _load_modules(self, root_dir: str, module_loader_registry: ModuleLoaderRegis
if not content.loaded():
continue

# Variables being passed to module, "source" and "version" are reserved
specified_vars = {k: v[0] for k, v in module_call_data.items()
if k != "source" and k != "version"}

self._internal_dir_load(directory=content.path(),
module_loader_registry=module_loader_registry,
dir_filter=dir_filter, specified_vars=specified_vars,
Expand Down Expand Up @@ -512,6 +563,7 @@ def _load_modules(self, root_dir: str, module_loader_registry: ModuleLoaderRegis
if all_module_definitions:
deep_merge.merge(self.out_definitions, all_module_definitions)
deep_merge.merge(self.out_evaluations_context, all_module_evaluations_context)
return skipped_a_module


def _handle_single_var_pattern(orig_variable: str, var_value_and_file_map: Dict[str, Tuple[Any, str]],
Expand Down Expand Up @@ -781,3 +833,29 @@ def function_nyi_handler(original, function_name, **_):
"feature request if it is important to your evaluation (value: '%s')",
function_name, original)
return FUNCTION_FAILED


def is_acceptable_module_param(value: Any) -> bool:
"""
This function determines if a value should be passed to a module as a parameter. We don't want to pass
unresolved var, local or module references because they can't be resolved from the module, so they need
to be resolved prior to being passed down.
"""
if isinstance(value, dict):
for k, v in value.items():
if not is_acceptable_module_param(v) or not is_acceptable_module_param(k):
return False
return True
if isinstance(value, set) or isinstance(value, list):
for v in value:
if not is_acceptable_module_param(v):
return False
return True

if not isinstance(value, str):
return True

for vbm in find_var_blocks(value):
if vbm.is_simple_var():
return False
return True
48 changes: 48 additions & 0 deletions checkov/terraform/parser_utils.py
@@ -1,11 +1,16 @@
import json
import re
from dataclasses import dataclass
from enum import Enum, IntEnum
from typing import Any, Dict, List, Optional

import hcl2


_FUNCTION_NAME_CHARS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")

_ARG_VAR_PATTERN = re.compile(r"[a-zA-Z_]+(\.[a-zA-Z_]+)+")

@dataclass
class VarBlockMatch:
full_str: str # Example: ${local.foo}
Expand All @@ -15,6 +20,13 @@ def replace(self, original: str, replaced: str):
self.full_str = self.full_str.replace(original, replaced)
self.var_only = self.var_only.replace(original, replaced)

def is_simple_var(self):
"""
Indicates whether or not the value of the var block matches a "simple" var pattern. For example:
local.blah, var.foo, module.one.a_resource.
"""
return _ARG_VAR_PATTERN.match(self.var_only) is not None


class ParserMode(Enum):
# Note: values are just for debugging.
Expand Down Expand Up @@ -46,8 +58,12 @@ def find_var_blocks(value: str) -> List[VarBlockMatch]:

mode_stack: List[ParserMode] = []
eval_start_pos_stack: List[int] = [] # location of first char inside brackets
param_start_pos_stack: List[int] = [] # location of open parens
preceding_dollar = False
preceding_string_escape = False
# NOTE: function calls can be nested, but since param args are only being inspected for variables,
# it's alright to ignore outer calls.
param_arg_start = -1
for index, c in enumerate(value):
current_mode = " " if not mode_stack else mode_stack[-1]

Expand Down Expand Up @@ -86,7 +102,31 @@ def find_var_blocks(value: str) -> List[VarBlockMatch]:
elif c == "]" and current_mode == ParserMode.ARRAY:
mode_stack.pop()
elif c == ")" and current_mode == ParserMode.PARAMS:
if param_arg_start > 0:
param_arg = value[param_arg_start: index].strip()
if _ARG_VAR_PATTERN.match(param_arg):
to_return.append(VarBlockMatch(param_arg, param_arg))
param_arg_start = -1

mode_stack.pop()
start_pos = param_start_pos_stack.pop()
# See if these params are for a function call. Back up from the index to determine if there's
# a function preceding.
function_name_start_index = start_pos
for function_index in range(start_pos - 1, 0, -1):
if value[function_index] in _FUNCTION_NAME_CHARS:
function_name_start_index = function_index
else:
break
# We know now there's a function call here. But, don't call it out if it's directly wrapped
# in eval markers.
in_eval_markers = False
if function_name_start_index >= 2:
in_eval_markers = value[function_name_start_index - 2] == "$" and \
value[function_name_start_index - 1] == "{"
if function_name_start_index < start_pos and not in_eval_markers:
to_return.append(VarBlockMatch(value[function_name_start_index: index + 1],
value[function_name_start_index: index + 1]))
elif c == '"':
if preceding_string_escape:
preceding_string_escape = False
Expand All @@ -113,6 +153,14 @@ def find_var_blocks(value: str) -> List[VarBlockMatch]:
elif c == "(": # do we care?
if not ParserMode.is_string(current_mode):
mode_stack.append(ParserMode.PARAMS)
param_start_pos_stack.append(index)
param_arg_start = index + 1
elif c == ",":
if current_mode == ParserMode.PARAMS and param_arg_start > 0:
param_arg = value[param_arg_start: index].strip()
if _ARG_VAR_PATTERN.match(param_arg):
to_return.append(VarBlockMatch(param_arg, param_arg))
param_arg_start = index + 1
elif c == "?" and current_mode == ParserMode.EVAL: # ternary
# If what's been processed in the ternary so far is "true" or "false" (boolean or string type)
# then nothing special will happen here and only the full expression will be returned.
Expand Down
6 changes: 4 additions & 2 deletions integration_tests/test_checkov_json_report.py
Expand Up @@ -34,8 +34,10 @@ def test_checkov_report_terragoat_with_skip(self):
def validate_report(self, report_path):
with open(report_path) as json_file:
data = json.load(json_file)
self.assertEqual(data["summary"]["parsing_errors"], 0, "expecting 0 parsing errors")
self.assertGreater(data["summary"]["failed"], 1, "expecting more then 1 failed checks")
self.assertEqual(data["summary"]["parsing_errors"], 0,
f"expecting 0 parsing errors but got: {data['results']['parsing_errors']}")
self.assertGreater(data["summary"]["failed"], 1,
f"expecting more than 1 failed checks, got: {data['summary']['failed']}")

def validate_json_quiet(self):
report_path = current_dir + "/../checkov_report_cfngoat_quiet.json"
Expand Down
@@ -0,0 +1,8 @@
variable "tags" {}


resource "aws_s3_bucket" "bucket" {
bucket = "its.a.bucket"
# NOTE: Prior to find_var_blocks handling vars in parameters, this didn't work
tags = merge(var.tags, {"more_tags" = "yes"})
}
@@ -0,0 +1,6 @@
output "tags" {
value = {
Team = "my_team"
Color = "red"
}
}

0 comments on commit 38473ea

Please sign in to comment.