Skip to content

Commit

Permalink
Merge pull request #440 from Yelp/feature/support-scans-from-differen…
Browse files Browse the repository at this point in the history
…t-directories

feature: adding ability to scan different directories
  • Loading branch information
domanchi authored Mar 31, 2021
2 parents d40e4db + 07bfd4c commit c969292
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 20 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ Create a baseline of potential secrets currently found in your git repository.
$ detect-secrets scan > .secrets.baseline
```

or, to run it from a different directory:

```bash
$ detect-secrets -C /path/to/directory scan > /path/to/directory/.secrets.baseline
```

**Scanning non-git tracked files:**

```bash
Expand Down
10 changes: 7 additions & 3 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
from .secrets_collection import SecretsCollection


def create(*paths: str, should_scan_all_files: bool = False) -> SecretsCollection:
def create(*paths: str, should_scan_all_files: bool = False, root: str = '') -> SecretsCollection:
"""Scans all the files recursively in path to initialize a baseline."""
secrets = SecretsCollection()
secrets = SecretsCollection(root=root)

for filename in get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files):
for filename in get_files_to_scan(
*paths,
should_scan_all_files=should_scan_all_files,
root=root,
):
secrets.scan_file(filename)

return secrets
Expand Down
19 changes: 14 additions & 5 deletions detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
from ..util import git
from ..util.code_snippet import get_code_snippet
from ..util.inject import call_function_with_arguments
from ..util.path import get_relative_path_if_in_cwd
from ..util.path import get_relative_path
from .log import log
from .plugins import Plugin
from .potential_secret import PotentialSecret


def get_files_to_scan(
*paths: str,
should_scan_all_files: bool = False
should_scan_all_files: bool = False,
root: str = '',
) -> Generator[str, None, None]:
"""
If we specify specific files, we should be able to scan them. This abides by the
Expand All @@ -49,7 +50,12 @@ def get_files_to_scan(
the scan for all files.
See test cases for more details.
:param root: if not specified, will assume current repository as root.
"""
if root:
root = os.path.realpath(root)

# First, we determine the appropriate filtering mode to be used.
# If this is True, then it will consider everything to be valid.
# Otherwise, it will only list the files that are valid.
Expand All @@ -62,7 +68,7 @@ def get_files_to_scan(

if not should_scan_all_files:
try:
valid_paths = git.get_tracked_files(git.get_root_directory())
valid_paths = git.get_tracked_files(git.get_root_directory(root))
except subprocess.CalledProcessError:
log.warning('Did not detect git repository. Try scanning all files instead.')
valid_paths = False
Expand All @@ -77,14 +83,17 @@ def get_files_to_scan(

for path in paths:
iterator = (
cast(List[Tuple], [(os.getcwd(), None, [path])])
cast(List[Tuple], [(root or os.getcwd(), None, [path])])
if os.path.isfile(path)
else os.walk(path)
)

for path_root, _, filenames in iterator:
for filename in filenames:
relative_path = get_relative_path_if_in_cwd(os.path.join(path_root, filename))
relative_path = get_relative_path(
root=root or os.getcwd(),
path=os.path.join(path_root, filename),
)
if not relative_path:
# e.g. symbolic links may be pointing outside the root directory
continue
Expand Down
11 changes: 9 additions & 2 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@ def __iter__(self) -> Generator:


class SecretsCollection:
def __init__(self) -> None:
def __init__(self, root: str = '') -> None:
"""
:param root: if specified, will scan as if the root was the value provided,
rather than the current working directory. We still store results as if
relative to root, since we're running as if it was in a different directory,
rather than scanning a different directory.
"""
self.data: Dict[str, Set[PotentialSecret]] = defaultdict(set)
self.root = root

@classmethod
def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
Expand All @@ -39,7 +46,7 @@ def files(self) -> Set[str]:
return set(self.data.keys())

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(filename):
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[secret.filename].add(secret)

def scan_diff(self, diff: str) -> None:
Expand Down
24 changes: 24 additions & 0 deletions detect_secrets/core/usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import filters
from . import plugins
from . import scan
from ...settings import get_settings
from .common import initialize_plugin_settings
from detect_secrets.__version__ import VERSION

Expand All @@ -35,6 +36,17 @@ def add_default_options(self) -> 'ParserBuilder':
version=VERSION,
help='Display version information.',
)
self._parser.add_argument(
'-C',
metavar='<path>',
dest='custom_root',
nargs=1,
default=[''],
help=(
'Run as if detect-secrets was started in <path>, rather than in the current '
'working directory.'
),
)

return self

Expand Down Expand Up @@ -137,6 +149,18 @@ def parse_args(self, argv: Optional[List[str]] = None) -> argparse.Namespace:
print(f'error: {str(e)}', file=sys.stderr)
sys.exit(1)

args.custom_root = args.custom_root[0]
if args.custom_root:
# This filter assumes current working directory, which will fail if we're running
# from a different directory.
# TODO: Maybe adjust this so that it is directory agnostic?
get_settings().disable_filters('detect_secrets.filters.common.is_invalid_file')

# Abide by the Principle of Least Surprise, and have the default value be the
# custom root directory itself.
if args.path == ['.']:
args.path = [args.custom_root]

return args


Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/core/usage/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _add_initialize_baseline_options(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
'path',
nargs='*',
default='.',
default=['.'],
help=(
'Scans the entire codebase and outputs a snapshot of '
'currently identified secrets.'
Expand Down
14 changes: 11 additions & 3 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,23 @@ def handle_scan_action(args: argparse.Namespace) -> None:
return

if args.only_allowlisted:
secrets = SecretsCollection()
for filename in get_files_to_scan(*args.path, should_scan_all_files=args.all_files):
secrets = SecretsCollection(root=args.custom_root)
for filename in get_files_to_scan(
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
):
for secret in scan_for_allowlisted_secrets_in_file(filename):
secrets[secret.filename].add(secret)

print(json.dumps(baseline.format_for_output(secrets), indent=2))
return

secrets = baseline.create(*args.path, should_scan_all_files=args.all_files)
secrets = baseline.create(
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
)
if args.baseline is not None:
# The pre-commit hook's baseline upgrade is to trim the supplied baseline for non-existent
# secrets, and to upgrade the format to the latest version. This is because the pre-commit
Expand Down
15 changes: 9 additions & 6 deletions detect_secrets/util/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
from typing import Set

from ..core.log import log
from .path import get_relative_path_if_in_cwd
from .path import get_relative_path


def get_root_directory() -> str:
def get_root_directory(path: str = '') -> str:
"""
:raises: CalledProcessError
"""
return subprocess.check_output(
'git rev-parse --show-toplevel'.split(),
).decode('utf-8').strip()
command = ['git']
if path:
command.extend(['-C', path])

command.extend(['rev-parse', '--show-toplevel'])
return subprocess.check_output(command).decode('utf-8').strip()


def get_tracked_files(root: str) -> Set[str]:
Expand All @@ -33,7 +36,7 @@ def get_tracked_files(root: str) -> Set[str]:
)

for filename in files.decode('utf-8').splitlines():
path = get_relative_path_if_in_cwd(os.path.join(root, filename))
path = get_relative_path(root, os.path.join(root, filename))
if path:
output.add(path)

Expand Down
8 changes: 8 additions & 0 deletions detect_secrets/util/path.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import os
from pathlib import Path
from typing import Optional


def get_relative_path(root: str, path: str) -> Optional[str]:
if Path(os.getcwd()) == Path(root):
return get_relative_path_if_in_cwd(path)

return os.path.realpath(path)[len(root + '/'):]


def get_relative_path_if_in_cwd(path: str) -> Optional[str]:
filepath = os.path.realpath(path)[len(os.getcwd() + '/'):]
if os.path.isfile(filepath):
Expand Down
16 changes: 16 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
import subprocess
import tempfile
from contextlib import contextmanager
from contextlib import redirect_stdout
Expand Down Expand Up @@ -55,6 +57,20 @@ def test_saves_to_baseline():
]
assert not printer.message

@staticmethod
def test_works_from_different_directory():
with tempfile.TemporaryDirectory() as d:
subprocess.call(['git', '-C', d, 'init'])
with open(os.path.join(d, 'credentials.yaml'), 'w') as f:
f.write('secret: asxeqFLAGMEfxuwma!')
subprocess.check_output(['git', '-C', d, 'add', 'credentials.yaml'])

with mock_printer(main_module) as printer:
assert main_module.main(['-C', d, 'scan']) == 0

results = json.loads(printer.message)['results']
assert results


class TestSlimScan:
@staticmethod
Expand Down

0 comments on commit c969292

Please sign in to comment.