Skip to content
This repository has been archived by the owner on Oct 5, 2019. It is now read-only.

Commit

Permalink
Merge branch 'alternative_blacklist_inputs'
Browse files Browse the repository at this point in the history
  • Loading branch information
piax93 committed Jun 19, 2019
2 parents 5fdac19 + f9cbe50 commit 0c91586
Show file tree
Hide file tree
Showing 13 changed files with 113 additions and 60 deletions.
13 changes: 7 additions & 6 deletions osxcollector/output_filters/find_blacklisted.py
Expand Up @@ -23,17 +23,18 @@ class FindBlacklistedFilter(OutputFilter):
blacklist_name - [REQUIRED] the name of the blacklist
blacklist_keys - [REQUIRED] get the value of these keys and compare against the blacklist
blacklist_is_regex - [REQUIRED] should the values in the blacklist file be treated as regex
blacklist_file_path - [REQUIRED] path to a file with the actual values to blacklist
blacklist_file_path - [REQUIRED if no blacklist_data_feed] path to a file with the actual values to blacklist
blacklist_data_feed - [REQUIRED if no blacklist_file_path] name of the data feed from which data is read
blacklist_is_domains - [OPTIONAL] interpret values as domains and do some smart regex and subdomain stuff with them
"""

def __init__(self, **kwargs):
super(FindBlacklistedFilter, self).__init__(**kwargs)
self._blacklists = self._init_blacklists()

def _init_blacklists(self):
"""Reads the config and builds a list of blacklists."""
return [create_blacklist(config_chunk) for config_chunk in config_get_deep('blacklists')]
data_feeds = kwargs.get('data_feeds', {})
self._blacklists = [
create_blacklist(config_chunk, data_feeds)
for config_chunk in config_get_deep('blacklists')
]

def filter_line(self, blob):
"""Find blacklisted values in a line.
Expand Down
4 changes: 3 additions & 1 deletion osxcollector/output_filters/opendns/lookup_domains.py
Expand Up @@ -77,7 +77,9 @@ def __init__(self, lookup_when=None, **kwargs):
'osxcollector_domains', 'osxcollector_opendns',
lookup_when=lookup_when, name_of_api_key='opendns', **kwargs
)
self._whitelist = create_blacklist(config_get_deep('domain_whitelist'))
self._whitelist = create_blacklist(
config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
)

def _lookup_iocs(self, all_iocs):
"""Caches the OpenDNS info for a set of domains.
Expand Down
4 changes: 3 additions & 1 deletion osxcollector/output_filters/opendns/related_domains.py
Expand Up @@ -67,7 +67,9 @@ def __init__(
the list of related domains.
"""
super(RelatedDomainsFilter, self).__init__(**kwargs)
self._whitelist = create_blacklist(config_get_deep('domain_whitelist'))
self._whitelist = create_blacklist(
config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
)

cache_file_name = config_get_deep('opendns.RelatedDomainsFilter.cache_file_name', None)
self._investigate = InvestigateApi(config_get_deep('api_key.opendns'), cache_file_name=cache_file_name)
Expand Down
76 changes: 40 additions & 36 deletions osxcollector/output_filters/util/blacklist.py
Expand Up @@ -6,8 +6,8 @@
from __future__ import unicode_literals

import logging
import os
import re
from collections import namedtuple

import six

Expand All @@ -17,19 +17,20 @@
from osxcollector.output_filters.util.domains import clean_domain


def create_blacklist(config_chunk):
def create_blacklist(config_chunk, data_feeds={}):
"""Reads the config and builds a Blacklist.
The blacklist config is sufficiently complex that much of this method deals with simply validating config
Args:
config_chunk: A dict of config for building the blacklist
data_feeds: Dict of generator functions returning the blacklist data
Returns:
A Blacklist
Raises:
MissingConfigError - when required key does not exist.
"""
required_keys = ['blacklist_name', 'blacklist_keys', 'blacklist_file_path']
required_keys = ['blacklist_name', 'blacklist_keys']
if not all([key in config_chunk for key in required_keys]):
raise MissingConfigError('Blacklist config is missing a required key.\nRequired keys are: {0}'.format(repr(required_keys)))

Expand All @@ -39,17 +40,34 @@ def create_blacklist(config_chunk):
blacklist_name = config_chunk.get('blacklist_name')
blacklist_keys = config_chunk.get('blacklist_keys')
blacklist_file_path = config_chunk.get('blacklist_file_path')
blacklist_data_feed = config_chunk.get('blacklist_data_feed')
if blacklist_file_path:
if not os.path.exists(blacklist_file_path):
raise MissingConfigError('The blacklist file {} does not exist'.format(blacklist_file_path))
blacklist_data_generator = _read_blacklist_file(blacklist_file_path)
elif blacklist_data_feed:
if blacklist_data_feed not in data_feeds:
raise MissingConfigError('Data feed {} not found among provided generators'.format(blacklist_data_feed))
blacklist_data_generator = data_feeds[blacklist_data_feed]()
else:
raise MissingConfigError('Blacklist config is missing a data input.\nEither select a file or a generator object')
blacklist_is_regex = config_chunk.get('blacklist_is_regex', False)
blacklist_is_domains = config_chunk.get('blacklist_is_domains', False)
return Blacklist(blacklist_name, blacklist_keys, blacklist_file_path, blacklist_is_regex, blacklist_is_domains)
return Blacklist(blacklist_name, blacklist_keys, blacklist_data_generator, blacklist_is_regex, blacklist_is_domains)


MatchingTerm = namedtuple('MatchingTerm', ['display_name', 'term'])
def _read_blacklist_file(filepath):
""" Parse blacklist file """
with open(filepath, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
yield line


class Blacklist(object):

def __init__(self, name, blacklisted_keys, file_path, is_regex=False, is_domains=False):
def __init__(self, name, blacklisted_keys, input_generator, is_regex=False, is_domains=False):
"""Build a blacklist from the data in the blacklist file.
Built in smarts make it easy to build a blacklist of domains
Expand All @@ -58,27 +76,13 @@ def __init__(self, name, blacklisted_keys, file_path, is_regex=False, is_domains
MissingConfigError - when required config key does not exist.
"""
self._name = name
self._file_path = file_path
self._blacklisted_keys = blacklisted_keys
self._is_domains = is_domains
self._is_regex = is_regex or self._is_domains
self._blacklisted_values = []

for line in self._read_blacklist_file_contents():
if not line.startswith('#'):
line = line.strip()
if line:
self._blacklisted_values.append(line)

self._blacklisted_values = [self._convert_to_matching_term(val) for val in self._blacklisted_values]
self._blacklisted_values = [x for x in self._blacklisted_values if x]

def _read_blacklist_file_contents(self):
try:
with open(self._file_path, 'r') as value_file:
return value_file.readlines()
except IOError as e:
raise MissingConfigError(str(e))
self._blacklisted_values = dict(
self._convert_to_matching_term(val) for val in input_generator if val
)
self._blacklisted_values.pop(None, None)

def _convert_to_matching_term(self, blacklisted_value):
"""Convert a blacklisted_value to a regex.
Expand All @@ -101,14 +105,14 @@ def _convert_to_matching_term(self, blacklisted_value):
u'Blacklisted value "{0}" cannot be resolved as a domain name'
.format(blacklisted_value),
)
return None
return None, None

blacklisted_value = r'^(.+\.)*{0}$'.format(re.escape(domain))
blacklisted_value = re.compile(r'^(.+\.)*{0}$'.format(re.escape(domain)))

if self._is_regex:
elif self._is_regex:
blacklisted_value = re.compile(blacklisted_value)

return MatchingTerm(display_name, blacklisted_value)
return blacklisted_value, display_name

def match_line(self, blob):
"""Determines whether a line matches the blacklist.
Expand Down Expand Up @@ -137,14 +141,14 @@ def match_values(self, values):
values = [values]

for val in values:
for matching_term in self._blacklisted_values:
if self._is_regex:
if matching_term.term.search(val):
return matching_term.display_name
else:
if matching_term.term == val:
return matching_term.display_name

if self._is_regex or self._is_domains:
return next(
(
name for term, name in six.iteritems(self._blacklisted_values) if term.search(val)
), None,
)
else:
return self._blacklisted_values.get(val, None)
return None

@property
Expand Down
6 changes: 3 additions & 3 deletions osxcollector/output_filters/util/config.py
Expand Up @@ -9,9 +9,9 @@

import yaml
try:
from yaml import CLoader as Loader
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import Loader
from yaml import SafeLoader

from osxcollector.output_filters.exceptions import MissingConfigError
from osxcollector.output_filters.util.dict_utils import DictUtils
Expand All @@ -36,7 +36,7 @@ def _read_config():
dict of config
"""
with open(_config_file_path()) as source:
return yaml.load(source.read(), Loader=Loader)
return yaml.load(source.read(), Loader=SafeLoader)


def _config_file_path():
Expand Down
4 changes: 3 additions & 1 deletion osxcollector/output_filters/virustotal/lookup_domains.py
Expand Up @@ -23,7 +23,9 @@ def __init__(self, lookup_when=None, **kwargs):
'osxcollector_domains', 'osxcollector_vtdomain',
lookup_when=lookup_when, name_of_api_key='virustotal', **kwargs
)
self._whitelist = create_blacklist(config_get_deep('domain_whitelist'))
self._whitelist = create_blacklist(
config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
)

def _lookup_iocs(self, all_iocs, resource_per_req=25):
"""Caches the VirusTotal info for a set of domains.
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,3 +1,4 @@
PyYAML==5.1
simplejson==3.10.0
six==1.12.0
threat_intel==0.1.29
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Expand Up @@ -8,7 +8,7 @@

setup(
name='osxcollector_output_filters',
version='1.1.0',
version='1.1.1',
author='Yelp Security',
author_email='opensource@yelp.com',
description='Filters that process and transform the output of OSXCollector',
Expand All @@ -20,6 +20,7 @@
packages=find_packages(exclude=['tests']),
provides=['osxcollector'],
install_requires=[
'PyYAML>=5.0',
'threat_intel',
'tldextract',
'simplejson',
Expand Down

Large diffs are not rendered by default.

@@ -1 +1 @@
{"shadowserver-bin-test": {"816a85d89ae34d2dc73b8c768eecb03935c568ba": {"sha1": "816a85d89ae34d2dc73b8c768eecb03935c568ba"}, "5d87de61cb368c93325dd910c202b8647f8e90ed": {"source": "MacAppInfo", "bit": "64", "md5": "6746005C822CEB6737B871698D3ED22F", "language": "English", "os_name": "Mac OS X 10.10 (build 14A389)", "sha256": "1FAFE48F626FDC030B0A0EFC1008D51CD3078D1B3EC95F808D12AFBFEF458B23", "source_version": "1.1", "os_mfg": "Apple Inc.", "crc32": "5332564F", "reference": "os_all", "filesize": "48976", "dirname": "/System/Library/Extensions/System.kext/PlugIns/Libkern.kext", "filename": "Libkern", "sha1": "5D87DE61CB368C93325DD910C202B8647F8E90ED", "filetimestamp": "09/19/2014 00:42:35", "binary": "1", "sha512": "C1CAEB26F892FE3C00B3B6BAB462058C772F91824092BF9B2E183F66D885278B6F0C6DA65D06994A45166501F1A889E38D5D234AE18ECBD2EF3CFD9F4388DC8F", "os_version": "10.10", "application_type": "Mach-O 64-bit kext bundle x86_64"}}}
{"shadowserver-bin-test": {"5d87de61cb368c93325dd910c202b8647f8e90ed": {"os_version": "10.10", "filesize": "48976", "reference": "os_all", "sha1": "5D87DE61CB368C93325DD910C202B8647F8E90ED", "dirname": "/System/Library/Extensions/System.kext/PlugIns/Libkern.kext", "binary": "1", "sha256": "1FAFE48F626FDC030B0A0EFC1008D51CD3078D1B3EC95F808D12AFBFEF458B23", "filetimestamp": "09/19/2014 00:42:35", "source": "MacAppInfo", "sha512": "C1CAEB26F892FE3C00B3B6BAB462058C772F91824092BF9B2E183F66D885278B6F0C6DA65D06994A45166501F1A889E38D5D234AE18ECBD2EF3CFD9F4388DC8F", "language": "English", "md5": "6746005C822CEB6737B871698D3ED22F", "bit": "64", "filename": "Libkern", "os_name": "Mac OS X 10.10 (build 14A389)", "application_type": "Mach-O 64-bit kext bundle x86_64", "crc32": "5332564F", "os_mfg": "Apple Inc.", "source_version": "1.1"}, "816a85d89ae34d2dc73b8c768eecb03935c568ba": {"sha1": "816a85d89ae34d2dc73b8c768eecb03935c568ba"}}}

0 comments on commit 0c91586

Please sign in to comment.