Merge 7a71f13 into 90ba6c3

chapinb · Oct 3, 2020 · 365e990 · 365e990
2 parents 90ba6c3 + 7a71f13
commit 365e990
Show file tree

Hide file tree

Showing 18 changed files with 363 additions and 147 deletions.
diff --git a/.github/images/interrogate.svg b/.github/images/interrogate.svg
diff --git a/README.md b/README.md
@@ -19,12 +19,14 @@ Yet another IP address enrichment tool.
 [![Total alerts](https://img.shields.io/lgtm/alerts/g/chapinb/chickadee.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/chapinb/chickadee/alerts/)
 [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/chapinb/chickadee.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/chapinb/chickadee/context:python)
 ![Unit Tests](https://github.com/chapinb/chickadee/workflows/Unit%20Tests/badge.svg)
+![Docstring Coverage](.github/images/interrogate.svg)
 [![Coverage Status](https://coveralls.io/repos/github/chapinb/chickadee/badge.svg)](https://coveralls.io/github/chapinb/chickadee)
 [![MIT Licence](https://badges.frapsoft.com/os/mit/mit.svg?v=103)](https://opensource.org/licenses/mit-license.php)
 [![PyPI version](https://badge.fury.io/py/chickadee.svg)](https://badge.fury.io/py/chickadee)
 [![PyPi downloads](https://pypip.in/d/chickadee/badge.png)](https://pypistats.org/packages/chickadee)
 [![DeepSource](https://static.deepsource.io/deepsource-badge-light-mini.svg)](https://deepsource.io/gh/chapinb/chickadee/?ref=repository-badge)
 
+
 Supported IP address resolvers:
 
 * https://ip-api.com/ - Free to query up to 45 requests per minute. Unlimited

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,6 +1,6 @@
-setuptools~=47.3.1
-twine~=3.1.1
-wheel~=0.34.2
-coverage~=5.1
-flake8~=3.8.3
-sphinx~=3.1.1
+setuptools~=50.3.0
+twine~=3.2.0
+wheel~=0.35.1
+coverage~=5.3
+flake8~=3.8.4
+sphinx~=3.2.1
diff --git a/libchickadee/chickadee.py b/libchickadee/chickadee.py
@@ -193,7 +193,7 @@ class Chickadee(object):
     """Class to handle chickadee script operations.
 
     Args:
-        outformat (str): One of ``json``, ``jsonl``, ``csv``
+        out_format (str): One of ``json``, ``jsonl``, ``csv``
         outfile (str or file_obj): Destination to write report.
         fields (list): Collection of fields to resolve and report on.
 
@@ -206,17 +206,18 @@ class Chickadee(object):
         >>> print(resolution)
 
     """
-    def __init__(self, outformat='json', outfile=sys.stdout, fields=None):
+    def __init__(self, out_format='json', outfile=sys.stdout, fields=None):
+        """Initialize class values and parameters to provided or default values"""
         self.resolver = 'ip_api'
         self.input_data = None
-        self.outformat = outformat
+        self.out_format = out_format
         self.outfile = outfile
         self.fields = fields
         self.force_single = False
         self.ignore_bogon = True
         self.no_count = False
         self.lang = 'en'
-        self.pbar = False
+        self.progress_bar = False
         self.resolve_ips = True
 
     def run(self, input_data, api_key=None):
@@ -269,7 +270,7 @@ def run(self, input_data, api_key=None):
     def get_api_key():
         """DEPRECIATED
 
-        Retrieve an API key set as an envar. Looks for value in
+        Retrieve an API key set as an environment variable. Looks for value in
         ``CHICKADEE_API_KEY``. May be depreciated in the near future.
 
         Returns:
@@ -355,8 +356,8 @@ def dir_handler(self, folder_path):
         """
         result_dict = {}
         for root, _, files in os.walk(folder_path):
-            for fentry in files:
-                file_entry = os.path.join(root, fentry)
+            for file_name in files:
+                file_entry = os.path.join(root, file_name)
                 logger.debug("Parsing file {}".format(file_entry))
                 file_results = self.file_handler(file_entry, self.ignore_bogon)
                 logger.debug("Parsed file {}, {} results".format(
@@ -382,14 +383,14 @@ def resolve(self, data_dict, api_key=None):
 
         resolver = self.get_resolver(api_key)
 
-        if self.pbar:
-            resolver.pbar = self.pbar
+        if self.progress_bar:
+            resolver.pbar = self.progress_bar
 
         logger.debug("Resolving IPs")
         if self.force_single:
             results = []
             data = distinct_ips
-            if self.pbar:
+            if self.progress_bar:
                 data = tqdm(distinct_ips, desc="Resolving IPs",
                             unit_scale=True)
 
@@ -414,6 +415,14 @@ def resolve(self, data_dict, api_key=None):
         return results
 
     def get_resolver(self, api_key):
+        """Determine the proper resolver to use, based on the available API keys.
+
+        Args:
+            api_key (str): API key value to register with the resolver
+
+        Returns:
+            Instance of an initialized resolver
+        """
         resolvers = {
             "ip_api": {
                 "pro_resolver": ipapi.ProResolver,
@@ -449,7 +458,7 @@ def write_output(self, results):
 
         Leverages the writers found in libchickadee.resolvers. Currently
         supports csv, json, and json lines formats, specified in
-        ``self.outformat``.
+        ``self.out_format``.
 
         Args:
             results (list): List of GeoIP results
@@ -458,13 +467,13 @@ def write_output(self, results):
             None
         """
 
-        if self.outformat == 'csv':
+        if self.out_format == 'csv':
             logger.debug("Writing CSV report")
             ResolverBase.write_csv(self.outfile, results, self.fields)
-        elif self.outformat == 'json':
+        elif self.out_format == 'json':
             logger.debug("Writing json report")
             ResolverBase.write_json(self.outfile, results, self.fields)
-        elif self.outformat == 'jsonl':
+        elif self.out_format == 'jsonl':
             logger.debug("Writing json lines report")
             ResolverBase.write_json(self.outfile, results, self.fields, lines=True)
 
@@ -562,6 +571,15 @@ def config_handing(config_file=None, search_conf_path=None):
 
 
 def parse_config_sections(conf, section_defs):
+    """Parse the sections of the configuration file
+
+    Args:
+        conf (dict): Loaded configuration file information
+        section_defs (dict): Mapping of configuration file values and defaults
+
+    Returns:
+        (dict): Final configuration to use with the script execution
+    """
     config = {}
     for section, value in section_defs.items():
         if section not in conf:
@@ -582,6 +600,15 @@ def parse_config_sections(conf, section_defs):
 
 
 def find_config_file(search_conf_path=None, filename_patterns=None):
+    """Handles the search operations for identifying configuration files on the system
+
+    Args:
+        search_conf_path (str): Path to look for a configuration file
+        filename_patterns (list): Patterns to use to find a configuration file
+
+    Returns:
+        (str): The path to the first identified configuration file.
+    """
     if not filename_patterns:
         # Needs to end with chickadee.ini or .chickadee.ini for detection.
         filename_patterns = ['chickadee.ini']
@@ -600,10 +627,16 @@ def find_config_file(search_conf_path=None, filename_patterns=None):
 
 
 def _generate_default_config_search_path():
-    # Config file search path order:
-    # 1. Current directory
-    # 2. User home directory
-    # 3. System wide directory
+    """This function dynamically populates the order in which to locate a configuration file.
+
+    Config file search path order:
+      1. Current directory
+      2. User home directory
+      3. System wide directory
+
+    Returns:
+        (list): Ordered list of paths to look for configuration files in
+    """
     search_conf_path = [os.path.abspath('.'), os.path.expanduser('~')]
     if 'win32' in sys.platform:
         search_conf_path.append(
@@ -781,7 +814,7 @@ def entry(args=None):  # pragma: no cover
     chickadee.no_count = params.get('no-count')
     chickadee.force_single = params.get('single')
     chickadee.lang = params.get('lang')
-    chickadee.pbar = params.get('progress')
+    chickadee.progress_bar = params.get('progress')
 
     logger.debug("Parsing input")
     if isinstance(params.get('data'), list):
@@ -794,7 +827,7 @@ def entry(args=None):  # pragma: no cover
 
     logger.debug("Writing output")
     chickadee.outfile = params.get('output-file')
-    chickadee.outformat = params.get('output-format')
+    chickadee.out_format = params.get('output-format')
     chickadee.write_output(data)
 
     logger.debug("Chickadee complete")

diff --git a/libchickadee/parsers/__init__.py b/libchickadee/parsers/__init__.py
@@ -63,6 +63,7 @@
 
 
 def run_parser_from_cli(args, parser_obj):  # pragma: no cover
+    """Allow a parser to run from the command line, both for testing and increased usability."""
     if os.path.isdir(args.path):
         for root, _, files in os.walk(args.path):
             for fentry in files:
@@ -77,6 +78,7 @@ def run_parser_from_cli(args, parser_obj):  # pragma: no cover
 class ParserBase(object):
     """Base class for parsers, containing common utilities."""
     def __init__(self, ignore_bogon=True):
+        """Configure the parser and set default values."""
         self.ignore_bogon = ignore_bogon
         self.ips = {}
 

diff --git a/libchickadee/parsers/evtx.py b/libchickadee/parsers/evtx.py
@@ -6,9 +6,10 @@
 
 
 class EVTXParser(ParserBase):
+    """Class to expose EVTX record contents for IP address extraction"""
     def __init__(self, ignore_bogon=True):
+        """Initialize the class and set defaults."""
         super().__init__(ignore_bogon)
-        self.ips = {}
 
     def parse_file(self, file_entry, is_stream=False):
         """Parse EVTX contents. Must be a path to an existing EVTX file.

diff --git a/libchickadee/parsers/plain_text.py b/libchickadee/parsers/plain_text.py
@@ -26,8 +26,8 @@ class PlainTextParser(ParserBase):
     """Class to extract IP addresses from plain text
         and gzipped plain text files."""
     def __init__(self, ignore_bogon=True):
+        """Initialize the class object and set defaults."""
         super().__init__(ignore_bogon)
-        self.ips = {}
 
     @staticmethod
     def is_gz_file(filepath):

diff --git a/libchickadee/parsers/xlsx.py b/libchickadee/parsers/xlsx.py
@@ -20,8 +20,8 @@
 class XLSXParser(ParserBase):
     """Class to extract IP addresses from XLSX workbooks."""
     def __init__(self, ignore_bogon=True):
+        """Initialize the class object and set defaults."""
         super().__init__(ignore_bogon)
-        self.ips = {}
 
     def parse_file(self, file_entry, is_stream=False):
         """Parse xlsx contents. Must be a path to an existing XLSX workbook.

diff --git a/libchickadee/resolvers/__init__.py b/libchickadee/resolvers/__init__.py
@@ -32,6 +32,7 @@ class ResolverBase:
         (ResolverBase)
     """
     def __init__(self):
+        """Initialize class object and set defaults."""
         self.uri = None
         self.lang = 'en'
         self.supported_langs = []
@@ -83,6 +84,15 @@ def query(self, data):
 
     @staticmethod
     def defang_ioc(ioc):
+        """Modify the display of IOCs to limit automated hyperlinking or access of unsafe resources
+
+        Args:
+            ioc (str): Content to scan for values to defang
+
+        Return:
+            (str): Defanged value.
+
+        """
         return ioc.replace(".", "[.]")
 
     @staticmethod
@@ -177,6 +187,15 @@ def write_json(outfile, data, headers=None, lines=False):
 
     @staticmethod
     def normalize_data_headers(data, headers):
+        """Filter content from response that is not requested in output
+
+        Args:
+            data (list): Response from API
+            headers (list): List of user-provided headers to return
+
+        Returns:
+            (dict): Updated API response with limited headers
+        """
         # Only include fields in headers
         # Include headers with no value if not present in original
         selected_data = []
@@ -193,20 +212,44 @@ def normalize_data_headers(data, headers):
 
     @staticmethod
     def flatten_objects(data, headers):
+        """Flatten complex fields in to simple columns for CSV usage
+
+        Args:
+            data (list): List of dictionaries to write to file
+            headers (list): Header row to use.
+
+        Returns:
+            (tuple, list): Tuple containing a list of rows to write, followed by a list of headers to use.
+        """
         rows_to_write = []
         for raw_row in data:
             row = raw_row.copy()
             # Convert lists in to CSV friendly format
             for header in headers:
-                if isinstance(raw_row.get(header, None), list):
-                    # Converts list of simple values (str, int, float, bool) to pipe delimited string
-                    row[header] = " | ".join(raw_row[header])
-                elif isinstance(raw_row.get(header, None), dict):
-                    # For each object in a dictionary, add a new header and append to
-                    for key, value in raw_row[header].items():
-                        new_header = '{}.{}'.format(header, key)
-                        if new_header not in headers:
-                            headers.append(new_header)
-                        row[new_header] = value
+                ResolverBase._process_header(header, headers, raw_row, row)
             rows_to_write.append(row)
         return rows_to_write, headers
+
+    @staticmethod
+    def _process_header(header, headers, raw_row, row):
+        """Extract list of headers in to a CSV format, flatten any nested dictionaries with dot notation
+
+        Args:
+            header (str): Field name
+            headers (list): List of all fields
+            raw_row (dict): Original row
+            row (dict): Updated row
+
+        Returns:
+            None. Updates the `row` dictionary provided
+        """
+        if isinstance(raw_row.get(header, None), list):
+            # Converts list of simple values (str, int, float, bool) to pipe delimited string
+            row[header] = " | ".join(raw_row[header])
+        elif isinstance(raw_row.get(header, None), dict):
+            # For each object in a dictionary, add a new header and append to
+            for key, value in raw_row[header].items():
+                new_header = '{}.{}'.format(header, key)
+                if new_header not in headers:
+                    headers.append(new_header)
+                row[new_header] = value