diff --git a/README.md b/README.md index 26758ae94..785731efa 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,8 @@ The current heuristic searches we implement out of the box include: * **RegexBasedDetector**: checks for any keys matching certain regular expressions (Artifactory, AWS, Slack, Stripe, Mailchimp). +**JwtTokenDetector**: checks for formally correct JWTs. + See [detect_secrets/ plugins](https://github.com/Yelp/detect-secrets/tree/master/detect_secrets/plugins) for more details. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index ad6e3116d..c66b2cf0d 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -335,6 +335,11 @@ class PluginOptions(object): disable_flag_text='--no-mailchimp-scan', disable_help_text='Disable scanning for Mailchimp keys', ), + PluginDescriptor( + classname='JwtTokenDetector', + disable_flag_text='--no-jwt-scan', + disable_help_text='Disable scanning for JWTs', + ), ] def __init__(self, parser): diff --git a/detect_secrets/plugins/common/initialize.py b/detect_secrets/plugins/common/initialize.py index 45998179f..d051c8ee3 100644 --- a/detect_secrets/plugins/common/initialize.py +++ b/detect_secrets/plugins/common/initialize.py @@ -6,6 +6,7 @@ from ..common.util import get_mapping_from_secret_type_to_class_name from ..high_entropy_strings import Base64HighEntropyString # noqa: F401 from ..high_entropy_strings import HexHighEntropyString # noqa: F401 +from ..jwt import JwtTokenDetector # noqa: F401 from ..keyword import KeywordDetector # noqa: F401 from ..mailchimp import MailchimpDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 diff --git a/detect_secrets/plugins/common/util.py b/detect_secrets/plugins/common/util.py index 6bef437f4..d604aa731 100644 --- a/detect_secrets/plugins/common/util.py +++ b/detect_secrets/plugins/common/util.py @@ -11,6 +11,7 @@ from ..basic_auth import BasicAuthDetector # noqa: F401 from ..high_entropy_strings import Base64HighEntropyString # noqa: F401 from ..high_entropy_strings import HexHighEntropyString # noqa: F401 +from ..jwt import JwtTokenDetector # noqa: F401 from ..keyword import KeywordDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 from ..slack import SlackDetector # noqa: F401 diff --git a/detect_secrets/plugins/jwt.py b/detect_secrets/plugins/jwt.py new file mode 100644 index 000000000..9868a01bc --- /dev/null +++ b/detect_secrets/plugins/jwt.py @@ -0,0 +1,53 @@ +""" +This plugin finds JWT tokens +""" +from __future__ import absolute_import + +import base64 +import json +import re + +from .base import RegexBasedDetector + +try: + # Python 2 + from future_builtins import filter +except ImportError: + # Python 3 + pass + + +class JwtTokenDetector(RegexBasedDetector): + secret_type = 'JSON Web Token' + denylist = [ + re.compile(r'eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*?'), + ] + + def secret_generator(self, string, *args, **kwargs): + return filter( + self.is_formally_valid, + super(JwtTokenDetector, self).secret_generator(string, *args, **kwargs), + ) + + @staticmethod + def is_formally_valid(token): + parts = token.split('.') + for idx, part in enumerate(parts): + try: + part = part.encode('ascii') + # https://github.com/magical/jwt-python/blob/2fd976b41111031313107792b40d5cfd1a8baf90/jwt.py#L49 + # https://github.com/jpadilla/pyjwt/blob/3d47b0ea9e5d489f9c90ee6dde9e3d9d69244e3a/jwt/utils.py#L33 + m = len(part) % 4 + if m == 1: + raise TypeError('Incorrect padding') + elif m == 2: + part += '=='.encode('utf-8') + elif m == 3: + part += '==='.encode('utf-8') + b64_decoded = base64.urlsafe_b64decode(part) + if idx < 2: + _ = json.loads(b64_decoded.decode('utf-8')) + except (TypeError, ValueError, UnicodeDecodeError): + return False + + return True diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 890bc4f55..c8b366db4 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -42,6 +42,7 @@ def test_consolidates_output_basic(self): 'ArtifactoryDetector': {}, 'StripeDetector': {}, 'MailchimpDetector': {}, + 'JwtTokenDetector': {}, } assert not hasattr(args, 'no_private_key_scan') diff --git a/tests/main_test.py b/tests/main_test.py index 6f9108eed..ec69d6252 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -94,6 +94,7 @@ def test_scan_string_basic( Base64HighEntropyString: {} BasicAuthDetector : False HexHighEntropyString : {} + JwtTokenDetector : False KeywordDetector : False MailchimpDetector : False PrivateKeyDetector : False @@ -120,6 +121,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) + JwtTokenDetector : False KeywordDetector : False MailchimpDetector : False PrivateKeyDetector : False @@ -254,6 +256,9 @@ def test_old_baseline_ignored_with_update_flag( 'hex_limit': 3, 'name': 'HexHighEntropyString', }, + { + 'name': 'JwtTokenDetector', + }, { 'name': 'KeywordDetector', }, @@ -294,6 +299,9 @@ def test_old_baseline_ignored_with_update_flag( 'hex_limit': 3, 'name': 'HexHighEntropyString', }, + { + 'name': 'JwtTokenDetector', + }, { 'name': 'KeywordDetector', }, @@ -387,6 +395,9 @@ def test_old_baseline_ignored_with_update_flag( { 'name': 'BasicAuthDetector', }, + { + 'name': 'JwtTokenDetector', + }, { 'name': 'MailchimpDetector', }, @@ -426,6 +437,9 @@ def test_old_baseline_ignored_with_update_flag( { 'name': 'BasicAuthDetector', }, + { + 'name': 'JwtTokenDetector', + }, { 'name': 'MailchimpDetector', }, diff --git a/tests/plugins/jwt_test.py b/tests/plugins/jwt_test.py new file mode 100644 index 000000000..9f7df5ece --- /dev/null +++ b/tests/plugins/jwt_test.py @@ -0,0 +1,46 @@ +from __future__ import absolute_import + +import pytest + +from detect_secrets.plugins.jwt import JwtTokenDetector + + +class TestJwtTokenDetector(object): + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + # valid jwt + ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True), # noqa: E501 + # valid jwt - but header contains CR/LF-s + ('eyJ0eXAiOiJKV1QiLA0KImFsZyI6IkhTMjU2In0.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True), # noqa: E501 + # valid jwt - but claims contain bunch of LF newlines + ('eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJuYW1lIjoiSm9lIiwKInN0YXR1cyI6ImVtcGxveWVlIgp9', True), # noqa: E501 + # valid jwt - claims contain strings with unicode accents + ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IsWww6HFkcOtIMOWxZHDqcOoIiwiaWF0IjoxNTE2MjM5MDIyfQ.k5HibI_uLn_RTuPcaCNkaVaQH2y5q6GvJg8GPpGMRwQ', True), # noqa: E501 + # as unicode literal + (u'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True), # noqa: E501 + # no signature - but still valid + ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True), # noqa: E501 + # decoded - invalid + ('{"alg":"HS256","typ":"JWT"}.{"name":"Jon Doe"}.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501 + # invalid json - invalid (caught by regex) + ('bm90X3ZhbGlkX2pzb25fYXRfYWxs.bm90X3ZhbGlkX2pzb25fYXRfYWxs.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501 + # missing claims - invalid + ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9', False), # noqa: E501 + # totally not a jwt + ('jwt', False), # noqa: E501 + # invalid json with random bytes + ('eyJhbasdGciOiJIUaddasdasfsasdasdzI1NiIasdsInR5cCI6IkpXVCasdJasd9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501 + # invalid json in jwt header - invalid (caught by parsing) + ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501 + # good by regex, but otherwise totally not JWT + ('eyJAAAA.eyJBBB', False), # noqa: E501 + ('eyJBB.eyJCC.eyJDDDD', False), # noqa: E501 + ], + ) + def test_analyze_string(self, payload, should_flag): + logic = JwtTokenDetector() + + output = logic.analyze_string(payload, 1, 'mock_filename') + assert len(output) == int(should_flag) diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index 003941e71..fb04c82e8 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -191,6 +191,9 @@ def test_that_baseline_gets_updated( 'hex_limit': 3, 'name': 'HexHighEntropyString', }, + { + 'name': 'JwtTokenDetector', + }, { 'name': 'KeywordDetector', },