Skip to content

Commit

Permalink
Merge pull request #96 from Yelp/autoupdate_baseline
Browse files Browse the repository at this point in the history
🎉 [pre-commit-hook] Automatically update the baseline
  • Loading branch information
KevinHock committed Nov 26, 2018
2 parents 7e8d909 + 58d035a commit c3eae1d
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 117 deletions.
36 changes: 22 additions & 14 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,47 @@
from detect_secrets.core.secrets_collection import SecretsCollection


def initialize(plugins, exclude_regex=None, rootdir='.', scan_all_files=False):
"""Scans the entire codebase for high entropy strings, and returns a
def initialize(
plugins,
exclude_regex=None,
path='.',
scan_all_files=False,
):
"""Scans the entire codebase for secrets, and returns a
SecretsCollection object.
:type plugins: tuple of detect_secrets.plugins.base.BasePlugin
:param plugins: rules to initialize the SecretsCollection with.
:type exclude_regex: str|None
:type rootdir: str
:type path: str
:type scan_all_files: bool
:rtype: SecretsCollection
"""
output = SecretsCollection(plugins, exclude_regex)

if os.path.isfile(rootdir):
if os.path.isfile(path):
# This option allows for much easier adhoc usage.
git_files = [rootdir]
files_to_scan = [path]
elif scan_all_files:
git_files = _get_files_recursively(rootdir)
files_to_scan = _get_files_recursively(path)
else:
git_files = _get_git_tracked_files(rootdir)
files_to_scan = _get_git_tracked_files(path)

if not git_files:
if not files_to_scan:
return output

if exclude_regex:
regex = re.compile(exclude_regex, re.IGNORECASE)
git_files = filter(
lambda x: not regex.search(x),
git_files,
files_to_scan = filter(
lambda file: (
not regex.search(file)
),
files_to_scan,
)

for file in git_files:
for file in files_to_scan:
output.scan_file(file)

return output
Expand Down Expand Up @@ -86,7 +94,7 @@ def get_secrets_not_in_baseline(results, baseline):
return new_secrets


def update_baseline_with_removed_secrets(results, baseline, filelist):
def trim_baseline_of_removed_secrets(results, baseline, filelist):
"""
NOTE: filelist is not a comprehensive list of all files in the repo
(because we can't be sure whether --all-files is passed in as a
Expand Down Expand Up @@ -200,7 +208,7 @@ def merge_results(old_results, new_results):
continue

old_secret = old_secrets_mapping[new_secret['hashed_secret']]
# Only propogate 'is_secret' if it's not already there
# Only propagate 'is_secret' if it's not already there
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
new_secret['is_secret'] = old_secret['is_secret']

Expand Down
7 changes: 5 additions & 2 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def _load_baseline_from_dict(cls, data):
'exclude_regex',
'plugins_used',
'results',
'version',
)):
raise IOError

Expand Down Expand Up @@ -95,7 +94,11 @@ def _load_baseline_from_dict(cls, data):
secret.secret_hash = item['hashed_secret']
result.data[filename][secret] = secret

result.version = data['version']
result.version = (
data['version']
if 'version' in data
else '0.0.0'
)

return result

Expand Down
8 changes: 6 additions & 2 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def add_console_use_arguments(self):
dest='action',
)

for action_parser in [ScanOptions, AuditOptions]:
for action_parser in (ScanOptions, AuditOptions):
action_parser(subparser).add_arguments()

return self
Expand Down Expand Up @@ -62,7 +62,11 @@ def _add_verbosity_argument(self):
return self

def _add_filenames_argument(self):
self.parser.add_argument('filenames', nargs='*', help='Filenames to check')
self.parser.add_argument(
'filenames',
nargs='*',
help='Filenames to check',
)
return self

def _add_set_baseline_argument(self):
Expand Down
82 changes: 19 additions & 63 deletions detect_secrets/pre_commit_hook.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import absolute_import

import json
import subprocess
import sys
import textwrap

from detect_secrets import VERSION
from detect_secrets.core.baseline import format_baseline_for_output
from detect_secrets.core.baseline import get_secrets_not_in_baseline
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
from detect_secrets.core.log import get_logger
from detect_secrets.core.secrets_collection import SecretsCollection
from detect_secrets.core.usage import ParserBuilder
Expand Down Expand Up @@ -36,7 +35,8 @@ def main(argv=None):
# Error logs handled within logic.
return 1

results = find_secrets_in_files(args)
plugins = initialize.from_parser_builder(args.plugins)
results = find_secrets_in_files(args, plugins)
if baseline_collection:
original_results = results
results = get_secrets_not_in_baseline(
Expand All @@ -52,12 +52,18 @@ def main(argv=None):
return 0

# Only attempt baseline modifications if we don't find any new secrets
successful_update = update_baseline_with_removed_secrets(
baseline_modified = trim_baseline_of_removed_secrets(
original_results,
baseline_collection,
args.filenames,
)
if successful_update:

if VERSION != baseline_collection.version:
baseline_collection.plugins = plugins
baseline_collection.version = VERSION
baseline_modified = True

if baseline_modified:
_write_to_baseline_file(
args.baseline[0],
baseline_collection.format_for_baseline_output(),
Expand Down Expand Up @@ -87,31 +93,13 @@ def get_baseline(baseline_filename):
if not baseline_filename:
return

raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename)
raise_exception_if_baseline_file_is_unstaged(baseline_filename)

baseline_string = _get_baseline_string_from_file(baseline_filename)
baseline_version = json.loads(baseline_string).get('version')

try:
raise_exception_if_baseline_version_is_outdated(
baseline_version,
)
except ValueError:
log.error(
'The supplied baseline may be incompatible with the current\n'
'version of detect-secrets. Please recreate your baseline to\n'
'avoid potential mis-configurations.\n\n'
'$ detect-secrets scan --update %s\n\n'
'Current Version: %s\n'
'Baseline Version: %s',
return SecretsCollection.load_baseline_from_string(
_get_baseline_string_from_file(
baseline_filename,
VERSION,
baseline_version if baseline_version else '0.0.0',
)

raise

return SecretsCollection.load_baseline_from_string(baseline_string)
),
)


def _get_baseline_string_from_file(filename): # pragma: no cover
Expand All @@ -130,7 +118,7 @@ def _get_baseline_string_from_file(filename): # pragma: no cover
raise


def raise_exception_if_baseline_file_is_not_up_to_date(filename):
def raise_exception_if_baseline_file_is_unstaged(filename):
"""We want to make sure that if there are changes to the baseline
file, they will be included in the commit. This way, we can keep
our baselines up-to-date.
Expand Down Expand Up @@ -161,44 +149,12 @@ def raise_exception_if_baseline_file_is_not_up_to_date(filename):
raise ValueError


def raise_exception_if_baseline_version_is_outdated(version):
"""
Version changes may cause breaking changes with past baselines.
Due to this, we want to make sure that the version that the
baseline was created with is compatible with the current version
of the scanner.
We use semantic versioning, and check for bumps in the MINOR
version (a good compromise, so we can release patches for other
non-baseline-related issues, without having all our users
recreate their baselines again).
:type version: str|None
:param version: version of baseline
:raises: ValueError
"""
if not version:
# Baselines created before this change, so by definition,
# would be outdated.
raise ValueError

baseline_version = version.split('.')
current_version = VERSION.split('.')

if int(current_version[0]) > int(baseline_version[0]):
raise ValueError
elif current_version[0] == baseline_version[0] and \
int(current_version[1]) > int(baseline_version[1]):
raise ValueError


def find_secrets_in_files(args):
plugins = initialize.from_parser_builder(args.plugins)
def find_secrets_in_files(args, plugins):
collection = SecretsCollection(plugins)

for filename in args.filenames:
# Don't scan the baseline file
if filename == args.baseline[0]:
# Obviously, don't detect the baseline file
continue

collection.scan_file(filename)
Expand Down
24 changes: 12 additions & 12 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from detect_secrets.core.baseline import get_secrets_not_in_baseline
from detect_secrets.core.baseline import merge_baseline
from detect_secrets.core.baseline import merge_results
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString
from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString
Expand All @@ -31,28 +31,28 @@ def setup(self):

def get_results(
self,
rootdir='./test_data/files',
path='./test_data/files',
exclude_regex=None,
scan_all_files=False,
):
return baseline.initialize(
self.plugins,
rootdir=rootdir,
path=path,
exclude_regex=exclude_regex,
scan_all_files=scan_all_files,
).json()

@pytest.mark.parametrize(
'rootdir',
'path',
[
'./test_data/files',
# Test relative paths
'test_data/../test_data/files/tmp/..',
],
)
def test_basic_usage(self, rootdir):
results = self.get_results(rootdir=rootdir)
def test_basic_usage(self, path):
results = self.get_results(path=path)

assert len(results.keys()) == 2
assert len(results['test_data/files/file_with_secrets.py']) == 1
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_no_files_in_git_repo(self):
),
),
):
results = self.get_results(rootdir='will_be_mocked')
results = self.get_results(path='will_be_mocked')

assert not results

Expand All @@ -99,7 +99,7 @@ def test_single_non_tracked_git_file_should_work(self):
assert len(results['will_be_mocked']) == 1

def test_scan_all_files(self):
results = self.get_results(rootdir='test_data/files', scan_all_files=True)
results = self.get_results(path='test_data/files', scan_all_files=True)
assert len(results.keys()) == 2


Expand Down Expand Up @@ -229,7 +229,7 @@ def test_deleted_secret(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand All @@ -247,7 +247,7 @@ def test_deleted_secret_file(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
[
Expand All @@ -272,7 +272,7 @@ def test_same_secret_new_location(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand Down Expand Up @@ -303,7 +303,7 @@ def test_no_baseline_modifications(self, results_dict, baseline_dict):
new_findings = secrets_collection_factory([results_dict])
baseline = secrets_collection_factory([baseline_dict])

assert not update_baseline_with_removed_secrets(
assert not trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand Down
Loading

0 comments on commit c3eae1d

Please sign in to comment.