Skip to content

Commit

Permalink
fixing storage of empty values in normalization (#953)
Browse files Browse the repository at this point in the history
  • Loading branch information
ryandeivert committed Jun 21, 2019
1 parent 47ebb6a commit 7f0c20f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 31 deletions.
21 changes: 17 additions & 4 deletions stream_alert/shared/normalize.py
Expand Up @@ -57,10 +57,23 @@ def match_types(cls, record, normalized_types):
'region': ['us-east-1', 'us-west-2']
}
"""
return {
key: sorted(set(cls._extract_values(record, set(keys_to_normalize))))
for key, keys_to_normalize in normalized_types.iteritems()
}
result = {}
for key, keys_to_normalize in normalized_types.iteritems():
values = set()
for value in cls._extract_values(record, set(keys_to_normalize)):
# Skip emtpy values
if value is None or value == '':
continue

values.add(value)

if not values:
continue

result[key] = sorted(values)

return result


@classmethod
def _extract_values(cls, record, keys_to_normalize):
Expand Down
61 changes: 34 additions & 27 deletions tests/unit/stream_alert_shared/test_normalizer.py
Expand Up @@ -139,38 +139,45 @@ def test_normalize_none_defined(self, log_mock):
Normalizer.normalize(self._test_record(), log_type)
log_mock.assert_called_with('No normalized types defined for log type: %s', log_type)

def test_normalize_bad_normalized_key(self):
"""Normalizer - Normalize, Bad Key(s)"""
log_type = 'cloudtrail'
bad_types = {
'bad_key_01',
'bad_key_02'
def test_key_does_not_exist(self):
"""Normalizer - Normalize, Key Does Not Exist"""
test_record = {
'accountId': 123456,
'region': 'region_name'
}
Normalizer._types_config = {
log_type: {
'bad_type': bad_types
}

normalized_types = {
'region': ['region', 'awsRegion'],
'sourceAccount': ['account', 'accountId'],
# There is no IP value in record, so normalization should not include this
'ipv4': ['sourceIPAddress']
}
expected_record = {
expected_results = {
'sourceAccount': [123456],
'region': ['region_name']
}

results = Normalizer.match_types(test_record, normalized_types)
assert_equal(results, expected_results)

def test_empty_value(self):
"""Normalizer - Normalize, Empty Value"""
test_record = {
'account': 123456,
'region': 'region_name',
'detail': {
'awsRegion': 'region_name',
'source': '1.1.1.2',
'userIdentity': {
"userName": "Alice",
"invokedBy": "signin.amazonaws.com"
}
},
'sourceIPAddress': '1.1.1.3',
'streamalert:normalization': {
'bad_type': list(),
}
'region': '' # This value is empty so should not be stored
}

record = self._test_record()
Normalizer.normalize(record, log_type)
assert_equal(record, expected_record)
normalized_types = {
'region': ['region', 'awsRegion'],
'sourceAccount': ['account', 'accountId'],
'ipv4': ['sourceIPAddress']
}
expected_results = {
'sourceAccount': [123456]
}

results = Normalizer.match_types(test_record, normalized_types)
assert_equal(results, expected_results)

def test_get_values_for_normalized_type(self):
"""Normalizer - Get Values for Normalized Type"""
Expand Down

0 comments on commit 7f0c20f

Please sign in to comment.