From adc835d50a5ea2fc5f37593ae4b4aa9a3501affc Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 22 Dec 2020 14:01:29 +0100 Subject: [PATCH 01/16] Reporting feature --- detect_secrets/audit/__init__.py | 1 + detect_secrets/audit/report.py | 137 +++++++++++++++++++++++++++++ detect_secrets/core/usage/audit.py | 29 ++++++ detect_secrets/main.py | 7 ++ 4 files changed, 174 insertions(+) create mode 100644 detect_secrets/audit/report.py diff --git a/detect_secrets/audit/__init__.py b/detect_secrets/audit/__init__.py index a3f9e1c9b..9e1dbdbb4 100644 --- a/detect_secrets/audit/__init__.py +++ b/detect_secrets/audit/__init__.py @@ -1,3 +1,4 @@ from . import analytics # noqa: F401 +from . import report # noqa: F401 from .audit import audit_baseline # noqa: F401 from .compare import compare_baselines # noqa: F401 diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py new file mode 100644 index 000000000..358886845 --- /dev/null +++ b/detect_secrets/audit/report.py @@ -0,0 +1,137 @@ +import hashlib +import codecs +import json +from enum import Enum + +from .io import print_message +from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class +from ..core.scan import _get_lines_from_file, _scan_line +from ..core.potential_secret import PotentialSecret +from ..plugins.base import BasePlugin + + +class SecretClass(Enum): + TRUE_POSITIVE = 1 + FALSE_POSITIVE = 2 + UNKNOWN = 3 + + def from_boolean(is_secret: bool) -> Enum: + if is_secret == None: + return SecretClass.UNKNOWN + elif is_secret: + return SecretClass.TRUE_POSITIVE + else: + return SecretClass.FALSE_POSITIVE + + def to_string(self) -> str: + return self.name + + def get_prioritary(self, secret_class: str) -> Enum: + try: + to_compare = SecretClass[secret_class] + except: + return self + if to_compare.value < self.value: + return secret_class + else: + return self + + +class SecretClassToPrint(Enum): + REAL_SECRET = 1 + FALSE_POSITIVE = 2 + + def from_class(secret_class: SecretClass) -> Enum: + if secret_class in [SecretClass.UNKNOWN, SecretClass.TRUE_POSITIVE]: + return SecretClassToPrint.REAL_SECRET + else: + return SecretClassToPrint.FALSE_POSITIVE + + +def print_report( + baseline_file: str, + class_to_print: SecretClassToPrint = None +) -> None: + baseline = json.load(codecs.open(baseline_file, encoding='utf-8')) + details = get_secrets_details_from_baseline(baseline) + plugins = get_mapping_from_secret_type_to_class() + secrets = {} + for filename, secret_type, secret_hash, is_secret in details: + secret_class = SecretClass.from_boolean(is_secret) + if class_to_print != None and SecretClassToPrint.from_class(secret_class) != class_to_print: + continue + try: + detections = get_potential_secrets(filename, plugins[secret_type](), secret_hash) + except: + continue + identifier = hashlib.sha512((secret_hash + filename).encode('utf-8')).hexdigest() + for detection in detections: + if identifier in secrets: + secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) + if not secret_type in secrets[identifier]['types']: + secrets[identifier]['types'].append(secret_type) + secrets[identifier]['class'] = secret_class.get_prioritary(secrets[identifier]['class']).to_string() + else: + finding = {} + finding['secret'] = detection.secret_value + finding['filename'] = filename + finding['lines'] = {} + finding['lines'][detection.line_number] = get_line_content(filename, detection.line_number) + finding['types'] = [secret_type] + finding['class'] = secret_class.to_string() + secrets[identifier] = finding + + output = [] + for identifier in secrets: + output.append(secrets[identifier]) + + print_message(json.dumps(output, indent=4, sort_keys=True)) + + +def get_secrets_details_from_baseline( + baseline: str +) -> [(str, str, str, bool)]: + """ + :returns: Details of each secret present in the baseline file. + """ + for filename, secrets in baseline['results'].items(): + for secret in secrets: + yield filename, secret['type'], secret['hashed_secret'], secret['is_secret'] + + +def get_secret_class( + is_secret: bool +) -> str: + """ + :returns: Secret class as string. + """ + return 'Unknown' if is_secret == None else 'True positive' if is_secret else 'False positive' + + +def get_potential_secrets( + filename: str, + plugin: Plugin, + secret_to_find: str +) -> [PotentialSecret]: + """ + :returns: List of PotentialSecrets detected by a specific plugin in a file. + """ + for lines in _get_lines_from_file(filename): + for line_number, line in list(enumerate(lines, 1)): + secrets = _scan_line(plugin, filename, line, line_number) + for secret in secrets: + if secret.secret_hash == secret_to_find: + yield secret + + +def get_line_content( + filename: str, + line_number: int +) -> str: + """ + :returns: Line content from filename by line number. + """ + content = codecs.open(filename, encoding='utf-8').read() + if not content: + return None + return content.splitlines()[line_number - 1] \ No newline at end of file diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 453c17a8d..8f55fab41 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar ) _add_mode_parser(parser) + _add_report_parser(parser) _add_statistics_module(parser) return parser @@ -46,6 +47,34 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ) +def _add_report_parser(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + '--report', + action='store_true', + help=( + 'Displays a report with the secrets detected' + ) + ) + + parser.add_argument( + '--only-real', + action='store_true', + help=( + 'Only includes real secrets in the report' + ) + ) + + parser.add_argument( + '--only-false', + action='store_true', + help=( + 'Only includes false positives in the report' + ) + ) + + + + def _add_statistics_module(parent: argparse.ArgumentParser) -> None: parser = parent.add_argument_group( title='analytics', diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7f055d6b5..00f24819c 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -120,6 +120,13 @@ def handle_audit_action(args: argparse.Namespace) -> None: print(json.dumps(stats.json(), indent=2)) else: print(str(stats)) + elif args.report: + class_to_print = None + if args.only_real: + class_to_print = audit.report.SecretClassToPrint.REAL_SECRET + elif args.only_false: + class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE + audit.report.print_report(args.filename[0], class_to_print) else: # Starts interactive session. if args.diff: From f895819443592f559f58b6fdd264511ab1ed61b5 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Wed, 23 Dec 2020 11:08:27 +0100 Subject: [PATCH 02/16] Reporting feature: first corrections --- detect_secrets/audit/report.py | 108 +++++++++++------------------ detect_secrets/core/usage/audit.py | 7 +- detect_secrets/main.py | 2 +- 3 files changed, 47 insertions(+), 70 deletions(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 358886845..f4b328286 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -3,109 +3,85 @@ import json from enum import Enum -from .io import print_message +from .common import get_baseline_from_file from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class from ..core.scan import _get_lines_from_file, _scan_line from ..core.potential_secret import PotentialSecret from ..plugins.base import BasePlugin - - -class SecretClass(Enum): - TRUE_POSITIVE = 1 - FALSE_POSITIVE = 2 - UNKNOWN = 3 - - def from_boolean(is_secret: bool) -> Enum: - if is_secret == None: - return SecretClass.UNKNOWN - elif is_secret: - return SecretClass.TRUE_POSITIVE - else: - return SecretClass.FALSE_POSITIVE - - def to_string(self) -> str: - return self.name - - def get_prioritary(self, secret_class: str) -> Enum: - try: - to_compare = SecretClass[secret_class] - except: - return self - if to_compare.value < self.value: - return secret_class - else: - return self +from ..constants import VerifiedResult class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 - def from_class(secret_class: SecretClass) -> Enum: - if secret_class in [SecretClass.UNKNOWN, SecretClass.TRUE_POSITIVE]: + def from_class(secret_class: VerifiedResult) -> Enum: + if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: return SecretClassToPrint.FALSE_POSITIVE -def print_report( +def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None ) -> None: - baseline = json.load(codecs.open(baseline_file, encoding='utf-8')) - details = get_secrets_details_from_baseline(baseline) plugins = get_mapping_from_secret_type_to_class() secrets = {} - for filename, secret_type, secret_hash, is_secret in details: - secret_class = SecretClass.from_boolean(is_secret) - if class_to_print != None and SecretClassToPrint.from_class(secret_class) != class_to_print: + for filename, secret in get_baseline_from_file(baseline_file): + verified_result = get_verified_result_from_boolean(secret.is_secret) + if class_to_print != None and SecretClassToPrint.from_class(verified_result) != class_to_print: continue try: - detections = get_potential_secrets(filename, plugins[secret_type](), secret_hash) + detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) except: continue - identifier = hashlib.sha512((secret_hash + filename).encode('utf-8')).hexdigest() + identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - if not secret_type in secrets[identifier]['types']: - secrets[identifier]['types'].append(secret_type) - secrets[identifier]['class'] = secret_class.get_prioritary(secrets[identifier]['class']).to_string() + if not secret.type in secrets[identifier]['types']: + secrets[identifier]['types'].append(secret.type) + secrets[identifier]['category'] = get_prioritary_verified_result(verified_result, VerifiedResult[secrets[identifier]['category']]).name else: - finding = {} - finding['secret'] = detection.secret_value - finding['filename'] = filename - finding['lines'] = {} - finding['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - finding['types'] = [secret_type] - finding['class'] = secret_class.to_string() - secrets[identifier] = finding + secrets[identifier] = { + 'secrets': detection.secret_value, + 'filename': filename, + 'lines': { + detection.line_number: get_line_content(filename, detection.line_number) + }, + 'types': [ + secret.type + ], + 'category': verified_result.name + } output = [] for identifier in secrets: output.append(secrets[identifier]) - print_message(json.dumps(output, indent=4, sort_keys=True)) + return output - -def get_secrets_details_from_baseline( - baseline: str -) -> [(str, str, str, bool)]: - """ - :returns: Details of each secret present in the baseline file. - """ - for filename, secrets in baseline['results'].items(): - for secret in secrets: - yield filename, secret['type'], secret['hashed_secret'], secret['is_secret'] +def get_prioritary_verified_result( + result1: VerifiedResult, + result2: VerifiedResult +) -> VerifiedResult: + if result1.value > result2.value: + return result1 + else: + return result2 -def get_secret_class( + +def get_verified_result_from_boolean( is_secret: bool -) -> str: - """ - :returns: Secret class as string. - """ - return 'Unknown' if is_secret == None else 'True positive' if is_secret else 'False positive' +) -> VerifiedResult: + if is_secret == None: + return VerifiedResult.UNVERIFIED + elif is_secret: + return VerifiedResult.VERIFIED_TRUE + else: + return VerifiedResult.VERIFIED_FALSE def get_potential_secrets( diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 8f55fab41..b9a7129b4 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -55,8 +55,9 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: 'Displays a report with the secrets detected' ) ) - - parser.add_argument( + + report_parser = parser.add_mutually_exclusive_group() + report_parser.add_argument( '--only-real', action='store_true', help=( @@ -64,7 +65,7 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: ) ) - parser.add_argument( + report_parser.add_argument( '--only-false', action='store_true', help=( diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 00f24819c..2845b2f2d 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -126,7 +126,7 @@ def handle_audit_action(args: argparse.Namespace) -> None: class_to_print = audit.report.SecretClassToPrint.REAL_SECRET elif args.only_false: class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE - audit.report.print_report(args.filename[0], class_to_print) + print(json.dumps(audit.report.generate_report(args.filename[0], class_to_print), indent=4, sort_keys=True)) else: # Starts interactive session. if args.diff: From c9633deb89c3f6a9556c883304bc4dadd270ee52 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 8 Jan 2021 14:29:20 +0100 Subject: [PATCH 03/16] Reporting feature: first test version --- detect_secrets/audit/report.py | 53 +++++++------ detect_secrets/core/usage/audit.py | 10 +-- detect_secrets/main.py | 8 +- tests/audit/report_test.py | 122 +++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 32 deletions(-) create mode 100644 tests/audit/report_test.py diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index f4b328286..ece9bddd9 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,14 +1,14 @@ -import hashlib import codecs -import json +import hashlib from enum import Enum -from .common import get_baseline_from_file -from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class -from ..core.scan import _get_lines_from_file, _scan_line -from ..core.potential_secret import PotentialSecret -from ..plugins.base import BasePlugin from ..constants import VerifiedResult +from ..core.plugins.util import get_mapping_from_secret_type_to_class +from ..core.plugins.util import Plugin +from ..core.potential_secret import PotentialSecret +from ..core.scan import _get_lines_from_file +from ..core.scan import _scan_line +from .common import get_baseline_from_file class SecretClassToPrint(Enum): @@ -19,41 +19,44 @@ def from_class(secret_class: VerifiedResult) -> Enum: if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: - return SecretClassToPrint.FALSE_POSITIVE + return SecretClassToPrint.FALSE_POSITIVE def generate_report( baseline_file: str, - class_to_print: SecretClassToPrint = None + class_to_print: SecretClassToPrint = None, ) -> None: plugins = get_mapping_from_secret_type_to_class() secrets = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = get_verified_result_from_boolean(secret.is_secret) - if class_to_print != None and SecretClassToPrint.from_class(verified_result) != class_to_print: + if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 continue try: detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) - except: + except Exception: continue identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - if not secret.type in secrets[identifier]['types']: + secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) # noqa: E501 + if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) - secrets[identifier]['category'] = get_prioritary_verified_result(verified_result, VerifiedResult[secrets[identifier]['category']]).name + secrets[identifier]['category'] = get_prioritary_verified_result( + verified_result, + VerifiedResult[secrets[identifier]['category']], + ).name else: secrets[identifier] = { 'secrets': detection.secret_value, 'filename': filename, 'lines': { - detection.line_number: get_line_content(filename, detection.line_number) + detection.line_number: get_line_content(filename, detection.line_number), }, 'types': [ - secret.type + secret.type, ], - 'category': verified_result.name + 'category': verified_result.name, } output = [] @@ -64,19 +67,19 @@ def generate_report( def get_prioritary_verified_result( - result1: VerifiedResult, - result2: VerifiedResult + result1: VerifiedResult, + result2: VerifiedResult, ) -> VerifiedResult: if result1.value > result2.value: return result1 - else: + else: return result2 def get_verified_result_from_boolean( - is_secret: bool + is_secret: bool, ) -> VerifiedResult: - if is_secret == None: + if is_secret is None: return VerifiedResult.UNVERIFIED elif is_secret: return VerifiedResult.VERIFIED_TRUE @@ -87,7 +90,7 @@ def get_verified_result_from_boolean( def get_potential_secrets( filename: str, plugin: Plugin, - secret_to_find: str + secret_to_find: str, ) -> [PotentialSecret]: """ :returns: List of PotentialSecrets detected by a specific plugin in a file. @@ -102,7 +105,7 @@ def get_potential_secrets( def get_line_content( filename: str, - line_number: int + line_number: int, ) -> str: """ :returns: Line content from filename by line number. @@ -110,4 +113,4 @@ def get_line_content( content = codecs.open(filename, encoding='utf-8').read() if not content: return None - return content.splitlines()[line_number - 1] \ No newline at end of file + return content.splitlines()[line_number - 1] diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index b9a7129b4..db4dccfc0 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -53,16 +53,16 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: action='store_true', help=( 'Displays a report with the secrets detected' - ) + ), ) - + report_parser = parser.add_mutually_exclusive_group() report_parser.add_argument( '--only-real', action='store_true', help=( 'Only includes real secrets in the report' - ) + ), ) report_parser.add_argument( @@ -70,12 +70,10 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: action='store_true', help=( 'Only includes false positives in the report' - ) + ), ) - - def _add_statistics_module(parent: argparse.ArgumentParser) -> None: parser = parent.add_argument_group( title='analytics', diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 2845b2f2d..34c8c23fb 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -126,7 +126,13 @@ def handle_audit_action(args: argparse.Namespace) -> None: class_to_print = audit.report.SecretClassToPrint.REAL_SECRET elif args.only_false: class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE - print(json.dumps(audit.report.generate_report(args.filename[0], class_to_print), indent=4, sort_keys=True)) + print( + json.dumps( + audit.report.generate_report(args.filename[0], class_to_print), + indent=4, + sort_keys=True, + ), + ) else: # Starts interactive session. if args.diff: diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py new file mode 100644 index 000000000..308071565 --- /dev/null +++ b/tests/audit/report_test.py @@ -0,0 +1,122 @@ +import os +import random +import string +import tempfile + +import pytest + +from detect_secrets.audit.report import generate_report +from detect_secrets.audit.report import SecretClassToPrint +from detect_secrets.constants import VerifiedResult +from detect_secrets.core import baseline +from detect_secrets.core.secrets_collection import SecretsCollection +from detect_secrets.plugins.basic_auth import BasicAuthDetector +from detect_secrets.plugins.jwt import JwtTokenDetector +from testing.factories import potential_secret_factory as original_potential_secret_factory + + +CREATED_FILES = [] + + +@pytest.mark.parametrize( + 'class_to_print, expected_real, expected_false', + [ + (None, 2, 2), + (SecretClassToPrint.REAL_SECRET, 2, 0), + (SecretClassToPrint.FALSE_POSITIVE, 0, 3), + ], +) +def test_generate_report(class_to_print, expected_real, expected_false): + filename = baseline_file() + output = generate_report(filename, class_to_print) + real, false = count_results(output) + assert real == expected_real + assert false == expected_false + delete_all_temporal_files() + + +def count_results(data): + real_secrets = 0 + false_secrets = 0 + for secret in data: + if SecretClassToPrint.from_class(VerifiedResult[secret['category']]) == SecretClassToPrint.REAL_SECRET: # noqa: E501 + real_secrets += 1 + else: + false_secrets += 1 + return real_secrets, false_secrets + + +def baseline_file(): + # Create our own SecretsCollection manually, so that we have fine-tuned control. + url_format = 'http://username:{}@www.example.com/auth' + first_secret = 'value1' + second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 + random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) + file_content = 'url = ' + url_format.format(first_secret) \ + + '\nexample = ' + url_format.format(random_secret) \ + + '\nlink = ' + url_format.format(first_secret) + first_file = create_file_with_content(file_content) + file_content = 'url = ' + url_format.format(second_secret) \ + + '\nexample = ' + url_format.format(random_secret) + second_file = create_file_with_content(file_content) + secrets = SecretsCollection() + secrets[first_file] = { + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=first_secret, + is_secret=True, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=random_secret, + is_secret=False, + line_number=2, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=first_secret, + is_secret=True, + line_number=3, + ), + } + secrets[second_file] = { + original_potential_secret_factory( + type=JwtTokenDetector.secret_type, + secret=second_secret, + is_secret=True, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=second_secret, + is_secret=False, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=random_secret, + is_secret=False, + line_number=2, + ), + } + + f = tempfile.NamedTemporaryFile(delete=False) + baseline.save_to_file(secrets, f.name) + f.seek(0) + CREATED_FILES.append(f.name) + return f.name + + +def create_file_with_content(file_content): + f = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False) + f.write(file_content) + f.seek(0) + CREATED_FILES.append(f.name) + return f.name + + +def delete_all_temporal_files(): + for file in CREATED_FILES: + if os.path.exists(file): + os.remove(file) From 46d0adb9aff8c90f3fecefbcc7c9a411b8802d07 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 09:56:59 +0100 Subject: [PATCH 04/16] Reporting feature optimization --- detect_secrets/audit/common.py | 38 +++++++++++++++++++++++++++---- detect_secrets/audit/report.py | 41 +++++----------------------------- tests/audit/report_test.py | 6 +++++ 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 9413dc475..abf85686e 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -41,10 +41,7 @@ def get_raw_secret_from_file(secret: PotentialSecret) -> str: :raises: SecretNotFoundOnSpecifiedLineError """ plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) - try: - target_line = open_file(secret.filename)[secret.line_number - 1] - except IndexError: - raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + target_line = get_raw_secret_line_from_file(secret) function = plugin.__class__.analyze_line if not hasattr(function, 'injectable_variables'): @@ -69,6 +66,39 @@ def get_raw_secret_from_file(secret: PotentialSecret) -> str: raise SecretNotFoundOnSpecifiedLineError(secret.line_number) +def get_all_raw_secrets_from_file(secret: PotentialSecret) -> [PotentialSecret]: + plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) + lines = open_file(secret.filename) + + function = plugin.__class__.analyze_line + if not hasattr(function, 'injectable_variables'): + function.injectable_variables = set( # type: ignore + get_injectable_variables(plugin.analyze_line), + ) + function.path = f'{plugin.__class__.__name__}.analyze_line' # type: ignore + + for line_number, line in enumerate(lines): + identified_secrets = inject_variables_into_function( + cast(SelfAwareCallable, function), + self=plugin, + filename=secret.filename, + line=line, + line_number=line_number + 1, # TODO: this will be optional + enable_eager_search=True, + ) + + for identified_secret in (identified_secrets or []): + if identified_secret == secret: + yield identified_secret + + +def get_raw_secret_line_from_file(secret: PotentialSecret) -> str: + try: + return open_file(secret.filename)[secret.line_number - 1] + except IndexError: + raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + + @lru_cache(maxsize=1) def open_file(filename: str) -> List[str]: """ diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index ece9bddd9..2b2d73303 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -8,7 +8,9 @@ from ..core.potential_secret import PotentialSecret from ..core.scan import _get_lines_from_file from ..core.scan import _scan_line +from .common import get_all_raw_secrets_from_file from .common import get_baseline_from_file +from .common import get_raw_secret_line_from_file class SecretClassToPrint(Enum): @@ -26,20 +28,16 @@ def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None, ) -> None: - plugins = get_mapping_from_secret_type_to_class() secrets = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = get_verified_result_from_boolean(secret.is_secret) if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 continue - try: - detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) - except Exception: - continue + detections = get_all_raw_secrets_from_file(secret) identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) # noqa: E501 + secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) secrets[identifier]['category'] = get_prioritary_verified_result( @@ -51,7 +49,7 @@ def generate_report( 'secrets': detection.secret_value, 'filename': filename, 'lines': { - detection.line_number: get_line_content(filename, detection.line_number), + detection.line_number: get_raw_secret_line_from_file(detection), }, 'types': [ secret.type, @@ -85,32 +83,3 @@ def get_verified_result_from_boolean( return VerifiedResult.VERIFIED_TRUE else: return VerifiedResult.VERIFIED_FALSE - - -def get_potential_secrets( - filename: str, - plugin: Plugin, - secret_to_find: str, -) -> [PotentialSecret]: - """ - :returns: List of PotentialSecrets detected by a specific plugin in a file. - """ - for lines in _get_lines_from_file(filename): - for line_number, line in list(enumerate(lines, 1)): - secrets = _scan_line(plugin, filename, line, line_number) - for secret in secrets: - if secret.secret_hash == secret_to_find: - yield secret - - -def get_line_content( - filename: str, - line_number: int, -) -> str: - """ - :returns: Line content from filename by line number. - """ - content = codecs.open(filename, encoding='utf-8').read() - if not content: - return None - return content.splitlines()[line_number - 1] diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py index 308071565..887215d4e 100644 --- a/tests/audit/report_test.py +++ b/tests/audit/report_test.py @@ -66,18 +66,21 @@ def baseline_file(): secret=first_secret, is_secret=True, line_number=1, + filename=first_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=random_secret, is_secret=False, line_number=2, + filename=first_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=first_secret, is_secret=True, line_number=3, + filename=first_file, ), } secrets[second_file] = { @@ -86,18 +89,21 @@ def baseline_file(): secret=second_secret, is_secret=True, line_number=1, + filename=second_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=second_secret, is_secret=False, line_number=1, + filename=second_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=random_secret, is_secret=False, line_number=2, + filename=second_file, ), } From f2e2421fcd63ccd91e8b34a35c799c556a2a345c Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 10:01:41 +0100 Subject: [PATCH 05/16] Code correction --- detect_secrets/audit/report.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 2b2d73303..9b79d2d24 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,13 +1,7 @@ -import codecs import hashlib from enum import Enum from ..constants import VerifiedResult -from ..core.plugins.util import get_mapping_from_secret_type_to_class -from ..core.plugins.util import Plugin -from ..core.potential_secret import PotentialSecret -from ..core.scan import _get_lines_from_file -from ..core.scan import _scan_line from .common import get_all_raw_secrets_from_file from .common import get_baseline_from_file from .common import get_raw_secret_line_from_file @@ -37,7 +31,7 @@ def generate_report( identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) + secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) # noqa: E501 if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) secrets[identifier]['category'] = get_prioritary_verified_result( From 74614cbcd3de2514c219df2942662863710a4f89 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 11:14:07 +0100 Subject: [PATCH 06/16] Reporting feature documentation --- README.md | 29 +++++++----- docs/audit.md | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b4b071aec..12bda5c52 100644 --- a/README.md +++ b/README.md @@ -297,29 +297,34 @@ const secret = "hunter2"; ```bash $ detect-secrets audit --help -usage: detect-secrets audit [-h] [--diff] [--stats] [--json] - filename [filename ...] +usage: detect-secrets audit [-h] [--diff] [--stats] + [--report] [--only-real | --only-false] + [--json] + filename [filename ...] Auditing a baseline allows analysts to label results, and optimize plugins for the highest signal-to-noise ratio for their environment. positional arguments: - filename Audit a given baseline file to distinguish the difference - between false and true positives. + filename Audit a given baseline file to distinguish the difference + between false and true positives. optional arguments: - -h, --help show this help message and exit - --diff Allows the comparison of two baseline files, in order to - effectively distinguish the difference between various plugin - configurations. - --stats Displays the results of an interactive auditing session which - have been saved to a baseline file. + -h, --help show this help message and exit + --diff Allows the comparison of two baseline files, in order to + effectively distinguish the difference between various plugin + configurations. + --stats Displays the results of an interactive auditing session which + have been saved to a baseline file. + --report Displays a report with the secrets detected + --only-real Only includes real secrets in the report + --only-false Only includes false positives in the report analytics: - Quantify the success of your plugins based on the labelled results in your + Quantify the success of your plugins based on the labelled results in your baseline. To be used with the statisitcs mode (--stats). - --json Outputs results in a machine-readable format. + --json Outputs results in a machine-readable format. ``` ## Configuration diff --git a/docs/audit.md b/docs/audit.md index 687e5eca2..5fa34bbbc 100644 --- a/docs/audit.md +++ b/docs/audit.md @@ -140,3 +140,126 @@ There are times you want to extract the raw secret values to run further analysi so with the `--raw` flag. TODO: Example when this feature is written up. + +## Report generation + +Maybe, you need to generate a full report with all the detect-secrets findings. You can generate +one with the `--report` flag: + +'''bash +$ detect-secrets audit --report .secret.baseline +[ + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "1": "secret=value", + "6": "password=value" + }, + "secrets": "value", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "UNVERIFIED", + "filename": "test.properties", + "lines": { + "2": "password=changeit", + "5": "password=changeit" + }, + "secrets": "changeit", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ." + }, + "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "types": [ + "Secret Keyword", + "JSON Web Token" + ] + }, + { + "category": "VERIFIED_FALSE", + "filename": "test.properties", + "lines": { + "7": "password=faketest" + }, + "secrets": "faketest", + "types": [ + "Secret Keyword" + ] + } +] +''' + +You can also select only the real secrets with the option `--only-real`: + +'''bash +$ detect-secrets audit --report --only-real .secret.baseline +[ + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "1": "secret=value", + "6": "password=value" + }, + "secrets": "value", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "UNVERIFIED", + "filename": "test.properties", + "lines": { + "2": "password=changeit", + "5": "password=changeit" + }, + "secrets": "changeit", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ." + }, + "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "types": [ + "JSON Web Token", + "Secret Keyword" + ] + } +] +''' + +Or include only the false positives with `--only-false`: + +'''bash +$ detect-secrets audit --report --only-false .secret.baseline +[ + { + "category": "VERIFIED_FALSE", + "filename": "test.properties", + "lines": { + "7": "password=faketest" + }, + "secrets": "faketest", + "types": [ + "Secret Keyword" + ] + } +] +''' \ No newline at end of file From 047325785ad755dcc2c4a9ec19de4672c383be00 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 12:15:24 +0100 Subject: [PATCH 07/16] Documentation corrections --- README.md | 16 ++++++++-------- docs/audit.md | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index dad4f45cf..8804f676e 100644 --- a/README.md +++ b/README.md @@ -357,31 +357,31 @@ const secret = "hunter2"; ```bash $ detect-secrets audit --help -usage: detect-secrets audit [-h] [--diff] [--stats] - [--report] [--only-real | --only-false] - [--json] +usage: detect-secrets audit [-h] [--diff] [--stats] + [--report] [--only-real | --only-false] + [--json] filename [filename ...] Auditing a baseline allows analysts to label results, and optimize plugins for the highest signal-to-noise ratio for their environment. positional arguments: - filename Audit a given baseline file to distinguish the difference + filename Audit a given baseline file to distinguish the difference between false and true positives. optional arguments: -h, --help show this help message and exit - --diff Allows the comparison of two baseline files, in order to - effectively distinguish the difference between various plugin + --diff Allows the comparison of two baseline files, in order to + effectively distinguish the difference between various plugin configurations. - --stats Displays the results of an interactive auditing session which + --stats Displays the results of an interactive auditing session which have been saved to a baseline file. --report Displays a report with the secrets detected --only-real Only includes real secrets in the report --only-false Only includes false positives in the report analytics: - Quantify the success of your plugins based on the labelled results in your + Quantify the success of your plugins based on the labelled results in your baseline. To be used with the statisitcs mode (--stats). --json Outputs results in a machine-readable format. diff --git a/docs/audit.md b/docs/audit.md index 5fa34bbbc..ae6e43645 100644 --- a/docs/audit.md +++ b/docs/audit.md @@ -262,4 +262,4 @@ $ detect-secrets audit --report --only-false .secret.baseline ] } ] -''' \ No newline at end of file +''' From 45ec64147e3e66ed662897d32da07d3dfcfe4476 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 12:19:51 +0100 Subject: [PATCH 08/16] Pre-commit errors fix --- detect_secrets/audit/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index f36ccf134..93b72485e 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -126,7 +126,7 @@ def get_all_secrets_from_file( if identified_secret == secret: all_secrets.append(identified_secret) - if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: + if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: # noqa: E501 line_getter.use_eager_transformers = True else: return all_secrets From 8f45d254e08c3223e6da72c05de4a7d163510d45 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 14:59:41 +0100 Subject: [PATCH 09/16] Reporting feature optimization --- detect_secrets/audit/common.py | 72 ++++++++++++------------------ detect_secrets/audit/report.py | 47 +++++++------------ detect_secrets/constants.py | 11 +++++ detect_secrets/core/usage/audit.py | 10 ++++- 4 files changed, 64 insertions(+), 76 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 93b72485e..bb28d797c 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -53,64 +53,44 @@ def get_raw_secret_from_file( :raises: SecretNotFoundOnSpecifiedLineError :raises: NoLineNumberError """ - plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) - line_getter = line_getter_factory(secret.filename) - is_first_time_opening_file = not line_getter.has_cached_lines - while True: - if not secret.line_number: - raise NoLineNumberError - - try: - target_line = line_getter.lines[secret.line_number - 1] - except IndexError: - raise SecretNotFoundOnSpecifiedLineError(secret.line_number) - - identified_secrets = call_function_with_arguments( - plugin.analyze_line, - filename=secret.filename, - line=target_line, - line_number=secret.line_number, - - # We enable eager search, because we *know* there's a secret here -- the baseline - # flagged it after all. - enable_eager_search=True, - ) - - for identified_secret in (identified_secrets or []): - if identified_secret == secret: - return cast(str, identified_secret.secret_value) - - # No secret found -- maybe it's due to invalid file transformation. - # However, this only applies to the first execution of the file, since we want a - # consistent transformed file. - # - # NOTE: This is defensive coding. If we assume that this is only run on valid baselines, - # then the baseline wouldn't record secrets that were both found with and without an eager - # transformer, in the same file. - if is_first_time_opening_file and not line_getter.use_eager_transformers: - line_getter.use_eager_transformers = True - else: - break + if not secret.line_number: + raise NoLineNumberError + + for item in get_raw_secrets_from_file(secret, line_getter_factory): + return item.secret_value raise SecretNotFoundOnSpecifiedLineError(secret.line_number) -def get_all_secrets_from_file( +def get_raw_secrets_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, ) -> [PotentialSecret]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have secret hashes, filenames, and how we detected - it was a secret in the first place, so we can reverse-engineer it. This method searchs all - the ocurrences of one secret in one file using one plugin. + it was a secret in the first place, so we can reverse-engineer it. This method searches all + the occurrences of one secret in one file using one plugin. + + :raises: SecretNotFoundOnSpecifiedLineError + :raises: NoLineNumberError """ plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) line_getter = line_getter_factory(secret.filename) is_first_time_opening_file = not line_getter.has_cached_lines all_secrets = [] while True: - for line_number, line in enumerate(line_getter.lines): + if secret.line_number: + try: + lines_to_scan = [line_getter.lines[secret.line_number - 1]] + line_numbers = [secret.line_number] + except IndexError: + raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + else: + lines_to_scan = line_getter.lines + line_numbers = range(len(lines_to_scan)) + + for line_number, line in zip(line_numbers, lines_to_scan): identified_secrets = call_function_with_arguments( plugin.analyze_line, filename=secret.filename, @@ -119,14 +99,18 @@ def get_all_secrets_from_file( # We enable eager search, because we *know* there's a secret here -- the baseline # flagged it after all. - enable_eager_search=True, + enable_eager_search=bool(secret.line_number), ) for identified_secret in (identified_secrets or []): if identified_secret == secret: all_secrets.append(identified_secret) - if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: # noqa: E501 + if ( + len(all_secrets) == 0 and + is_first_time_opening_file and + not line_getter.use_eager_transformers + ): line_getter.use_eager_transformers = True else: return all_secrets diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index af8c3eefe..25dc11917 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,10 +1,9 @@ -import hashlib from enum import Enum from typing import Callable from ..constants import VerifiedResult -from .common import get_all_secrets_from_file from .common import get_baseline_from_file +from .common import get_raw_secrets_from_file from .common import LineGetter from .common import open_file @@ -27,23 +26,26 @@ def generate_report( ) -> None: secrets = {} for filename, secret in get_baseline_from_file(baseline_file): - verified_result = get_verified_result_from_boolean(secret.is_secret) - if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 + verified_result = VerifiedResult.from_secret(secret) + if ( + class_to_print is not None and + SecretClassToPrint.from_class(verified_result) != class_to_print + ): continue - detections = get_all_secrets_from_file(secret) - identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() + secret.line_number = 0 + detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) for detection in detections: - if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501 - if secret.type not in secrets[identifier]['types']: - secrets[identifier]['types'].append(secret.type) - secrets[identifier]['category'] = get_prioritary_verified_result( + if (secret.secret_hash, filename) in secrets: + secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501 + if secret.type not in secrets[(secret.secret_hash, filename)]['types']: + secrets[(secret.secret_hash, filename)]['types'].append(secret.type) + secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result( # noqa: E501 verified_result, - VerifiedResult[secrets[identifier]['category']], + VerifiedResult[secrets[(secret.secret_hash, filename)]['category']], ).name else: - secrets[identifier] = { + secrets[(secret.secret_hash, filename)] = { 'secrets': detection.secret_value, 'filename': filename, 'lines': { @@ -55,14 +57,10 @@ def generate_report( 'category': verified_result.name, } - output = [] - for identifier in secrets: - output.append(secrets[identifier]) + return list(secrets.values()) - return output - -def get_prioritary_verified_result( +def get_prioritized_verified_result( result1: VerifiedResult, result2: VerifiedResult, ) -> VerifiedResult: @@ -70,14 +68,3 @@ def get_prioritary_verified_result( return result1 else: return result2 - - -def get_verified_result_from_boolean( - is_secret: bool, -) -> VerifiedResult: - if is_secret is None: - return VerifiedResult.UNVERIFIED - elif is_secret: - return VerifiedResult.VERIFIED_TRUE - else: - return VerifiedResult.VERIFIED_FALSE diff --git a/detect_secrets/constants.py b/detect_secrets/constants.py index f9763eb59..7c84d1f8a 100644 --- a/detect_secrets/constants.py +++ b/detect_secrets/constants.py @@ -1,7 +1,18 @@ from enum import Enum +from .core.potential_secret import PotentialSecret + class VerifiedResult(Enum): VERIFIED_FALSE = 1 UNVERIFIED = 2 VERIFIED_TRUE = 3 + + @staticmethod + def from_secret(secret: PotentialSecret) -> 'VerifiedResult': + if secret.is_secret is None: + return VerifiedResult.UNVERIFIED + elif secret.is_secret: + return VerifiedResult.VERIFIED_TRUE + else: + return VerifiedResult.VERIFIED_FALSE diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index db4dccfc0..9cc784f92 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -21,7 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar ) _add_mode_parser(parser) - _add_report_parser(parser) + _add_report_module(parser) _add_statistics_module(parser) return parser @@ -47,7 +47,13 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ) -def _add_report_parser(parser: argparse.ArgumentParser) -> None: +def _add_report_module(parent: argparse.ArgumentParser) -> None: + parser = parent.add_argument_group( + title='reporting', + description=( + 'Display a summary with all the findings and the made decisions' + ), + ) parser.add_argument( '--report', action='store_true', From efd9cdaa7da2062281b30bed25afe8cdc80d7a3f Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 20:01:36 +0100 Subject: [PATCH 10/16] Reporting test correction --- tests/audit/report_test.py | 260 ++++++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 87 deletions(-) diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py index 887215d4e..d544d21e1 100644 --- a/tests/audit/report_test.py +++ b/tests/audit/report_test.py @@ -1,7 +1,8 @@ -import os import random import string import tempfile +import textwrap +from contextlib import contextmanager import pytest @@ -12,27 +13,144 @@ from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.plugins.basic_auth import BasicAuthDetector from detect_secrets.plugins.jwt import JwtTokenDetector -from testing.factories import potential_secret_factory as original_potential_secret_factory +from detect_secrets.settings import transient_settings -CREATED_FILES = [] +url_format = 'http://username:{}@www.example.com/auth' +first_secret = 'value1' +second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 +random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) @pytest.mark.parametrize( - 'class_to_print, expected_real, expected_false', + 'class_to_print, expected_real, expected_false, expected_output', [ - (None, 2, 2), - (SecretClassToPrint.REAL_SECRET, 2, 0), - (SecretClassToPrint.FALSE_POSITIVE, 0, 3), + ( + None, 3, 1, [ + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(first_secret)), + 3: 'link = {}'.format(url_format.format(first_secret)), + }, + 'secrets': first_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'UNVERIFIED', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + BasicAuthDetector.secret_type, + JwtTokenDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + ], + ), + ( + SecretClassToPrint.REAL_SECRET, 3, 0, [ + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(first_secret)), + 3: 'link = {}'.format(url_format.format(first_secret)), + }, + 'secrets': first_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'UNVERIFIED', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + JwtTokenDetector.secret_type, + ], + }, + ], + ), + ( + SecretClassToPrint.FALSE_POSITIVE, 0, 2, [ + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + ], + ), ], ) -def test_generate_report(class_to_print, expected_real, expected_false): - filename = baseline_file() - output = generate_report(filename, class_to_print) +def test_generate_report( + class_to_print, + expected_real, + expected_false, + expected_output, + baseline_file, +): + output = generate_report(baseline_file, class_to_print) real, false = count_results(output) assert real == expected_real assert false == expected_false - delete_all_temporal_files() + for expected in expected_output: + found = False + for item in output: + if expected['secrets'] == item['secrets'] and expected['category'] == item['category']: + for key in expected.keys(): + assert item[key] == expected[key] + found = True + assert found def count_results(data): @@ -46,83 +164,51 @@ def count_results(data): return real_secrets, false_secrets -def baseline_file(): - # Create our own SecretsCollection manually, so that we have fine-tuned control. - url_format = 'http://username:{}@www.example.com/auth' - first_secret = 'value1' - second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 - random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) - file_content = 'url = ' + url_format.format(first_secret) \ - + '\nexample = ' + url_format.format(random_secret) \ - + '\nlink = ' + url_format.format(first_secret) - first_file = create_file_with_content(file_content) - file_content = 'url = ' + url_format.format(second_secret) \ - + '\nexample = ' + url_format.format(random_secret) - second_file = create_file_with_content(file_content) - secrets = SecretsCollection() - secrets[first_file] = { - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=first_secret, - is_secret=True, - line_number=1, - filename=first_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=random_secret, - is_secret=False, - line_number=2, - filename=first_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=first_secret, - is_secret=True, - line_number=3, - filename=first_file, - ), - } - secrets[second_file] = { - original_potential_secret_factory( - type=JwtTokenDetector.secret_type, - secret=second_secret, - is_secret=True, - line_number=1, - filename=second_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=second_secret, - is_secret=False, - line_number=1, - filename=second_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=random_secret, - is_secret=False, - line_number=2, - filename=second_file, - ), - } - - f = tempfile.NamedTemporaryFile(delete=False) - baseline.save_to_file(secrets, f.name) - f.seek(0) - CREATED_FILES.append(f.name) - return f.name +@contextmanager +def create_file_with_content(content): + with tempfile.NamedTemporaryFile() as f: + f.write(content.encode()) + f.seek(0) + yield f.name -def create_file_with_content(file_content): - f = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False) - f.write(file_content) - f.seek(0) - CREATED_FILES.append(f.name) - return f.name +@pytest.fixture +def baseline_file(): + # Create our own SecretsCollection manually, so that we have fine-tuned control. + first_content = textwrap.dedent(f""" + url = {url_format.format(first_secret)} + example = {url_format.format(random_secret)} + link = {url_format.format(first_secret)} + """)[1:] + second_content = textwrap.dedent(f""" + url = {url_format.format(second_secret)} + example = {url_format.format(random_secret)} + """)[1:] + with create_file_with_content(first_content) as first_file, \ + create_file_with_content(second_content) as second_file, \ + tempfile.NamedTemporaryFile() as baseline_file, \ + transient_settings({ + 'plugins_used': [ + {'name': 'BasicAuthDetector'}, + {'name': 'JwtTokenDetector'}, -def delete_all_temporal_files(): - for file in CREATED_FILES: - if os.path.exists(file): - os.remove(file) + ], + }): + secrets = SecretsCollection() + secrets.scan_file(first_file) + secrets.scan_file(second_file) + labels = { + (first_file, BasicAuthDetector.secret_type, 1): True, + (first_file, BasicAuthDetector.secret_type, 2): None, + (first_file, BasicAuthDetector.secret_type, 3): True, + (second_file, JwtTokenDetector.secret_type, 1): True, + (second_file, BasicAuthDetector.secret_type, 1): False, + (second_file, BasicAuthDetector.secret_type, 2): False, + } + for item in secrets: + _, secret = item + secret.is_secret = labels[(secret.filename, secret.type, secret.line_number)] + baseline.save_to_file(secrets, baseline_file.name) + baseline_file.seek(0) + yield baseline_file.name From d1430e1796deea07c656ed91c8b49bb1858ce81a Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 20:08:18 +0100 Subject: [PATCH 11/16] Documentation upgrade --- README.md | 4 ++++ detect_secrets/core/usage/audit.py | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8804f676e..9d2aa0356 100644 --- a/README.md +++ b/README.md @@ -377,6 +377,10 @@ optional arguments: --stats Displays the results of an interactive auditing session which have been saved to a baseline file. --report Displays a report with the secrets detected + +reporting: + Display a summary with all the findings and the made decisions. To be used with the report mode (--report). + --only-real Only includes real secrets in the report --only-false Only includes false positives in the report diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 9cc784f92..7d864b9ef 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -46,14 +46,6 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ), ) - -def _add_report_module(parent: argparse.ArgumentParser) -> None: - parser = parent.add_argument_group( - title='reporting', - description=( - 'Display a summary with all the findings and the made decisions' - ), - ) parser.add_argument( '--report', action='store_true', @@ -62,6 +54,16 @@ def _add_report_module(parent: argparse.ArgumentParser) -> None: ), ) + +def _add_report_module(parent: argparse.ArgumentParser) -> None: + parser = parent.add_argument_group( + title='reporting', + description=( + 'Display a report with all the findings and the made decisions. ' + 'To be used with the report mode (--report).' + ), + ) + report_parser = parser.add_mutually_exclusive_group() report_parser.add_argument( '--only-real', From c4e4a2c431601cc8d828225b09e953bbdfbaab45 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 26 Feb 2021 18:00:31 +0100 Subject: [PATCH 12/16] Corrections --- detect_secrets/audit/report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 25dc11917..9a82df90d 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -12,7 +12,7 @@ class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 - def from_class(secret_class: VerifiedResult) -> Enum: + def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint': if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: @@ -32,6 +32,7 @@ def generate_report( SecretClassToPrint.from_class(verified_result) != class_to_print ): continue + # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. secret.line_number = 0 detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) From fcbee98e3f9b8a14962d293b096bb25d1de7547d Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 26 Feb 2021 18:25:00 +0100 Subject: [PATCH 13/16] Corrections --- detect_secrets/audit/common.py | 2 +- detect_secrets/audit/report.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index bb28d797c..977854b99 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -83,7 +83,7 @@ def get_raw_secrets_from_file( if secret.line_number: try: lines_to_scan = [line_getter.lines[secret.line_number - 1]] - line_numbers = [secret.line_number] + line_numbers = [secret.line_number - 1] except IndexError: raise SecretNotFoundOnSpecifiedLineError(secret.line_number) else: diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 9a82df90d..164aee80b 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -32,7 +32,7 @@ def generate_report( SecretClassToPrint.from_class(verified_result) != class_to_print ): continue - # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. + # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501 secret.line_number = 0 detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) From b4e9cc4a2ce6bc14b5b495bc318b9e6b3998bba7 Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 16:09:44 +0200 Subject: [PATCH 14/16] Correct mypy issues --- detect_secrets/audit/common.py | 7 ++++--- detect_secrets/audit/report.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 977854b99..0ab6149b8 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -6,6 +6,7 @@ from typing import Iterator from typing import List from typing import Optional +from typing import Any from . import io from ..core import baseline @@ -44,7 +45,7 @@ def open_file(filename: str) -> 'LineGetter': def get_raw_secret_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> str: +) -> Any: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have line numbers, filenames, and how we detected @@ -65,7 +66,7 @@ def get_raw_secret_from_file( def get_raw_secrets_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> [PotentialSecret]: +) -> List[PotentialSecret]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have secret hashes, filenames, and how we detected @@ -88,7 +89,7 @@ def get_raw_secrets_from_file( raise SecretNotFoundOnSpecifiedLineError(secret.line_number) else: lines_to_scan = line_getter.lines - line_numbers = range(len(lines_to_scan)) + line_numbers = list(range(len(lines_to_scan))) for line_number, line in zip(line_numbers, lines_to_scan): identified_secrets = call_function_with_arguments( diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 164aee80b..0eea28bf5 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,5 +1,9 @@ from enum import Enum from typing import Callable +from typing import Any +from typing import List +from typing import Dict +from typing import Tuple from ..constants import VerifiedResult from .common import get_baseline_from_file @@ -12,6 +16,7 @@ class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 + @staticmethod def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint': if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET @@ -23,8 +28,9 @@ def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> None: - secrets = {} +) -> List[Dict[str, Any]]: + + secrets: Dict[Tuple[str, str], Any] = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = VerifiedResult.from_secret(secret) if ( From 14c964f8f72fd7b6fa44642b0f83aaa6814b17ba Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 16:39:48 +0200 Subject: [PATCH 15/16] Reorder imports by precommit --- detect_secrets/audit/common.py | 2 +- detect_secrets/audit/report.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 0ab6149b8..8a2c434ff 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -1,12 +1,12 @@ import json from contextlib import contextmanager from functools import lru_cache +from typing import Any from typing import Callable from typing import cast from typing import Iterator from typing import List from typing import Optional -from typing import Any from . import io from ..core import baseline diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 0eea28bf5..ce1536b04 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,8 +1,8 @@ from enum import Enum -from typing import Callable from typing import Any -from typing import List +from typing import Callable from typing import Dict +from typing import List from typing import Tuple from ..constants import VerifiedResult From 4001e8e110bdc80c11e444cc9df40bbe3ad63b5a Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 17:12:57 +0200 Subject: [PATCH 16/16] Improve mypy issue resolution --- detect_secrets/audit/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 8a2c434ff..7359fc419 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -1,7 +1,6 @@ import json from contextlib import contextmanager from functools import lru_cache -from typing import Any from typing import Callable from typing import cast from typing import Iterator @@ -45,7 +44,7 @@ def open_file(filename: str) -> 'LineGetter': def get_raw_secret_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> Any: +) -> Optional[str]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have line numbers, filenames, and how we detected