Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: adding multiprocessing support #441

Merged
merged 3 commits into from
Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import cast
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

from . import upgrades
Expand All @@ -18,16 +19,22 @@
from .secrets_collection import SecretsCollection


def create(*paths: str, should_scan_all_files: bool = False, root: str = '') -> SecretsCollection:
def create(
*paths: str,
should_scan_all_files: bool = False,
root: str = '',
num_processors: Optional[int] = None,
) -> SecretsCollection:
"""Scans all the files recursively in path to initialize a baseline."""
secrets = SecretsCollection(root=root)
kwargs = {}
if num_processors:
kwargs['num_processors'] = num_processors

for filename in get_files_to_scan(
*paths,
should_scan_all_files=should_scan_all_files,
root=root,
):
secrets.scan_file(filename)
secrets = SecretsCollection(root=root)
secrets.scan_files(
*get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files, root=root),
**kwargs,
)

return secrets

Expand Down
23 changes: 23 additions & 0 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import multiprocessing as mp
import os
from collections import defaultdict
from typing import Any
Expand Down Expand Up @@ -45,6 +46,23 @@ def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
def files(self) -> Set[str]:
return set(self.data.keys())

def scan_files(self, *filenames: str, num_processors: Optional[int] = None) -> None:
"""Just like scan_file, but optimized through parallel processing."""
if len(filenames) == 1:
self.scan_file(filenames[0])
return

if not num_processors:
num_processors = mp.cpu_count()

with mp.Pool(processes=num_processors) as p:
domanchi marked this conversation as resolved.
Show resolved Hide resolved
for secrets in p.imap_unordered(
_scan_file_and_serialize,
[os.path.join(self.root, filename) for filename in filenames],
):
for secret in secrets:
self[os.path.relpath(secret.filename, self.root)].add(secret)

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[filename].add(secret)
Expand Down Expand Up @@ -269,3 +287,8 @@ def __sub__(self, other: Any) -> 'SecretsCollection':
output[filename] = self[filename]

return output


def _scan_file_and_serialize(filename: str) -> List[PotentialSecret]:
"""Used for multiprocessing, since lambdas can't be serialized."""
return list(scan.scan_file(filename))
14 changes: 14 additions & 0 deletions detect_secrets/core/usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ def add_default_options(self) -> 'ParserBuilder':
'working directory.'
),
)
self._parser.add_argument(
'-c',
'--cores',
dest='num_cores',
nargs=1,
type=int,
default=[None],
help=(
'Specify the number of cores to use for parallel processing. Defaults to '
'using the max cores on the current host.'
),
)

return self

Expand Down Expand Up @@ -161,6 +173,8 @@ def parse_args(self, argv: Optional[List[str]] = None) -> argparse.Namespace:
if args.path == ['.']:
args.path = [args.custom_root]

args.num_cores = args.num_cores[0]

return args


Expand Down
1 change: 1 addition & 0 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def handle_scan_action(args: argparse.Namespace) -> None:
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
num_processors=args.num_cores,
)
if args.baseline is not None:
# The pre-commit hook's baseline upgrade is to trim the supplied baseline for non-existent
Expand Down