Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reporting feature #387

Merged
merged 20 commits into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -407,29 +407,38 @@ const secret = "hunter2";

```bash
$ detect-secrets audit --help
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The audit help output has been updated to include the new report generation options.

usage: detect-secrets audit [-h] [--diff] [--stats] [--json]
filename [filename ...]
usage: detect-secrets audit [-h] [--diff] [--stats]
[--report] [--only-real | --only-false]
[--json]
filename [filename ...]

Auditing a baseline allows analysts to label results, and optimize plugins for
the highest signal-to-noise ratio for their environment.

positional arguments:
filename Audit a given baseline file to distinguish the difference
between false and true positives.
filename Audit a given baseline file to distinguish the difference
between false and true positives.

optional arguments:
-h, --help show this help message and exit
--diff Allows the comparison of two baseline files, in order to
effectively distinguish the difference between various plugin
configurations.
--stats Displays the results of an interactive auditing session which
have been saved to a baseline file.
-h, --help show this help message and exit
--diff Allows the comparison of two baseline files, in order to
effectively distinguish the difference between various plugin
configurations.
--stats Displays the results of an interactive auditing session which
have been saved to a baseline file.
--report Displays a report with the secrets detected

reporting:
Display a summary with all the findings and the made decisions. To be used with the report mode (--report).

--only-real Only includes real secrets in the report
--only-false Only includes false positives in the report

analytics:
Quantify the success of your plugins based on the labelled results in your
baseline. To be used with the statisitcs mode (--stats).

--json Outputs results in a machine-readable format.
--json Outputs results in a machine-readable format.
```

## Configuration
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/audit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import analytics # noqa: F401
from . import report # noqa: F401
from .audit import audit_baseline # noqa: F401
from .compare import compare_baselines # noqa: F401
91 changes: 56 additions & 35 deletions detect_secrets/audit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,55 +44,76 @@ def open_file(filename: str) -> 'LineGetter':
def get_raw_secret_from_file(
secret: PotentialSecret,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> str:
) -> Optional[str]:
"""
We're analyzing the contents straight from the baseline, and therefore, we don't know
the secret value (by design). However, we have line numbers, filenames, and how we detected
it was a secret in the first place, so we can reverse-engineer it.

:raises: SecretNotFoundOnSpecifiedLineError
:raises: NoLineNumberError
"""
if not secret.line_number:
raise NoLineNumberError

for item in get_raw_secrets_from_file(secret, line_getter_factory):
return item.secret_value

raise SecretNotFoundOnSpecifiedLineError(secret.line_number)


def get_raw_secrets_from_file(
secret: PotentialSecret,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> List[PotentialSecret]:
"""
We're analyzing the contents straight from the baseline, and therefore, we don't know
the secret value (by design). However, we have secret hashes, filenames, and how we detected
it was a secret in the first place, so we can reverse-engineer it. This method searches all
the occurrences of one secret in one file using one plugin.

:raises: SecretNotFoundOnSpecifiedLineError
:raises: NoLineNumberError
"""
plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type))
line_getter = line_getter_factory(secret.filename)
is_first_time_opening_file = not line_getter.has_cached_lines
all_secrets = []
while True:
if not secret.line_number:
raise NoLineNumberError

try:
target_line = line_getter.lines[secret.line_number - 1]
except IndexError:
raise SecretNotFoundOnSpecifiedLineError(secret.line_number)

identified_secrets = call_function_with_arguments(
plugin.analyze_line,
filename=secret.filename,
line=target_line,
line_number=secret.line_number,

# We enable eager search, because we *know* there's a secret here -- the baseline
# flagged it after all.
enable_eager_search=True,
)

for identified_secret in (identified_secrets or []):
if identified_secret == secret:
return cast(str, identified_secret.secret_value)

# No secret found -- maybe it's due to invalid file transformation.
# However, this only applies to the first execution of the file, since we want a
# consistent transformed file.
#
# NOTE: This is defensive coding. If we assume that this is only run on valid baselines,
# then the baseline wouldn't record secrets that were both found with and without an eager
# transformer, in the same file.
if is_first_time_opening_file and not line_getter.use_eager_transformers:
if secret.line_number:
try:
lines_to_scan = [line_getter.lines[secret.line_number - 1]]
line_numbers = [secret.line_number - 1]
except IndexError:
raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
else:
lines_to_scan = line_getter.lines
line_numbers = list(range(len(lines_to_scan)))

for line_number, line in zip(line_numbers, lines_to_scan):
identified_secrets = call_function_with_arguments(
plugin.analyze_line,
filename=secret.filename,
line=line,
line_number=line_number + 1,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if secret.line_number:
             try:
                 lines_to_scan = [line_getter.lines[secret.line_number - 1]]
                 line_numbers = [secret.line_number]

I think that this code is correct, note that in the line 98 the line_number value is increased in one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but secret.line_number is already 1-based. As such, when we obtain the target_line, we subtract it by 1 (to obtain from the line_getter.lines array).

So, if we add one to it again, I think we'll get an off-by-one error.

Copy link
Contributor Author

@pablosnt pablosnt Feb 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I get it. I have runned the tests and I think that it's fine. Thank you!


# We enable eager search, because we *know* there's a secret here -- the baseline
# flagged it after all.
enable_eager_search=bool(secret.line_number),
)

for identified_secret in (identified_secrets or []):
if identified_secret == secret:
all_secrets.append(identified_secret)

if (
len(all_secrets) == 0 and
is_first_time_opening_file and
not line_getter.use_eager_transformers
):
line_getter.use_eager_transformers = True
else:
break

raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
return all_secrets


class LineGetter:
Expand Down
77 changes: 77 additions & 0 deletions detect_secrets/audit/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from enum import Enum
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Tuple

from ..constants import VerifiedResult
from .common import get_baseline_from_file
from .common import get_raw_secrets_from_file
from .common import LineGetter
from .common import open_file


class SecretClassToPrint(Enum):
REAL_SECRET = 1
FALSE_POSITIVE = 2

@staticmethod
def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint':
if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]:
return SecretClassToPrint.REAL_SECRET
else:
return SecretClassToPrint.FALSE_POSITIVE


def generate_report(
baseline_file: str,
class_to_print: SecretClassToPrint = None,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> List[Dict[str, Any]]:

secrets: Dict[Tuple[str, str], Any] = {}
for filename, secret in get_baseline_from_file(baseline_file):
verified_result = VerifiedResult.from_secret(secret)
if (
class_to_print is not None and
SecretClassToPrint.from_class(verified_result) != class_to_print
):
continue
# Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501
secret.line_number = 0
pablosnt marked this conversation as resolved.
Show resolved Hide resolved
detections = get_raw_secrets_from_file(secret)
line_getter = line_getter_factory(filename)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method generate_report uses the new LineGetter class to manage all the file lines.

for detection in detections:
if (secret.secret_hash, filename) in secrets:
secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501
if secret.type not in secrets[(secret.secret_hash, filename)]['types']:
secrets[(secret.secret_hash, filename)]['types'].append(secret.type)
secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result( # noqa: E501
verified_result,
VerifiedResult[secrets[(secret.secret_hash, filename)]['category']],
).name
else:
secrets[(secret.secret_hash, filename)] = {
'secrets': detection.secret_value,
'filename': filename,
'lines': {
detection.line_number: line_getter.lines[detection.line_number - 1],
},
'types': [
secret.type,
],
'category': verified_result.name,
}

return list(secrets.values())


def get_prioritized_verified_result(
result1: VerifiedResult,
result2: VerifiedResult,
) -> VerifiedResult:
if result1.value > result2.value:
return result1
else:
return result2
11 changes: 11 additions & 0 deletions detect_secrets/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
from enum import Enum

from .core.potential_secret import PotentialSecret


class VerifiedResult(Enum):
VERIFIED_FALSE = 1
UNVERIFIED = 2
VERIFIED_TRUE = 3

@staticmethod
def from_secret(secret: PotentialSecret) -> 'VerifiedResult':
if secret.is_secret is None:
return VerifiedResult.UNVERIFIED
elif secret.is_secret:
return VerifiedResult.VERIFIED_TRUE
else:
return VerifiedResult.VERIFIED_FALSE
36 changes: 36 additions & 0 deletions detect_secrets/core/usage/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar
)

_add_mode_parser(parser)
_add_report_module(parser)
_add_statistics_module(parser)
return parser

Expand All @@ -45,6 +46,41 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None:
),
)

parser.add_argument(
'--report',
action='store_true',
help=(
'Displays a report with the secrets detected'
),
)


def _add_report_module(parent: argparse.ArgumentParser) -> None:
parser = parent.add_argument_group(
title='reporting',
description=(
'Display a report with all the findings and the made decisions. '
'To be used with the report mode (--report).'
),
)

report_parser = parser.add_mutually_exclusive_group()
report_parser.add_argument(
'--only-real',
action='store_true',
help=(
'Only includes real secrets in the report'
),
)

report_parser.add_argument(
'--only-false',
action='store_true',
help=(
'Only includes false positives in the report'
),
)
pablosnt marked this conversation as resolved.
Show resolved Hide resolved


def _add_statistics_module(parent: argparse.ArgumentParser) -> None:
parser = parent.add_argument_group(
Expand Down
13 changes: 13 additions & 0 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,19 @@ def handle_audit_action(args: argparse.Namespace) -> None:
print(json.dumps(stats.json(), indent=2))
else:
print(str(stats))
elif args.report:
class_to_print = None
if args.only_real:
class_to_print = audit.report.SecretClassToPrint.REAL_SECRET
elif args.only_false:
class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE
print(
json.dumps(
audit.report.generate_report(args.filename[0], class_to_print),
indent=4,
sort_keys=True,
),
)
else:
# Starts interactive session.
if args.diff:
Expand Down
Loading