Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JWT Detector Plugin #239

Merged
merged 3 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ The current heuristic searches we implement out of the box include:

* **RegexBasedDetector**: checks for any keys matching certain regular expressions (Artifactory, AWS, Slack, Stripe, Mailchimp).

**JwtTokenDetector**: checks for formally correct JWTs.

See [detect_secrets/
plugins](https://github.com/Yelp/detect-secrets/tree/master/detect_secrets/plugins)
for more details.
Expand Down
5 changes: 5 additions & 0 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,11 @@ class PluginOptions(object):
disable_flag_text='--no-mailchimp-scan',
disable_help_text='Disable scanning for Mailchimp keys',
),
PluginDescriptor(
classname='JwtTokenDetector',
disable_flag_text='--no-jwt-scan',
disable_help_text='Disable scanning for JWTs',
),
]

def __init__(self, parser):
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/plugins/common/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ..common.util import get_mapping_from_secret_type_to_class_name
from ..high_entropy_strings import Base64HighEntropyString # noqa: F401
from ..high_entropy_strings import HexHighEntropyString # noqa: F401
from ..jwt import JwtTokenDetector # noqa: F401
from ..keyword import KeywordDetector # noqa: F401
from ..mailchimp import MailchimpDetector # noqa: F401
from ..private_key import PrivateKeyDetector # noqa: F401
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/plugins/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..basic_auth import BasicAuthDetector # noqa: F401
from ..high_entropy_strings import Base64HighEntropyString # noqa: F401
from ..high_entropy_strings import HexHighEntropyString # noqa: F401
from ..jwt import JwtTokenDetector # noqa: F401
from ..keyword import KeywordDetector # noqa: F401
from ..private_key import PrivateKeyDetector # noqa: F401
from ..slack import SlackDetector # noqa: F401
Expand Down
53 changes: 53 additions & 0 deletions detect_secrets/plugins/jwt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
This plugin finds JWT tokens
"""
from __future__ import absolute_import

import base64
import json
import re

from .base import RegexBasedDetector

try:
# Python 2
from future_builtins import filter
except ImportError:
# Python 3
pass


class JwtTokenDetector(RegexBasedDetector):
secret_type = 'JSON Web Token'
denylist = [
re.compile(r'eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*'),
Copy link
Collaborator

@KevinHock KevinHock Sep 16, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a ? after the *? I believe this will make it non-greedy i.e. more efficient.

In other words

re.compile(r'eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*?'),

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KevinHock Absolutely, and thanks for pointing it out.

]

def secret_generator(self, string, *args, **kwargs):
return filter(
self.is_formally_valid,
super(JwtTokenDetector, self).secret_generator(string, *args, **kwargs),
)

@staticmethod
def is_formally_valid(token):
parts = token.split('.')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noting for posterity the spec at https://tools.ietf.org/id/draft-jones-json-web-token-04.html#rfc.appendix.Appendix%20B aligns with this code

(Specifically)

If the length mod 4 is 0, no padding is added;
if the length mod 4 is 2, two '=' padding characters are added;
if the length mod 4 is 3, one '=' padding character is added;
if the length mod 4 is 1, the input is malformed.

for idx, part in enumerate(parts):
try:
part = part.encode('ascii')
# https://github.com/magical/jwt-python/blob/2fd976b41111031313107792b40d5cfd1a8baf90/jwt.py#L49
# https://github.com/jpadilla/pyjwt/blob/3d47b0ea9e5d489f9c90ee6dde9e3d9d69244e3a/jwt/utils.py#L33
m = len(part) % 4
if m == 1:
raise TypeError('Incorrect padding')
elif m == 2:
part += '=='.encode('utf-8')
elif m == 3:
part += '==='.encode('utf-8')
b64_decoded = base64.urlsafe_b64decode(part)
if idx < 2:
_ = json.loads(b64_decoded.decode('utf-8'))
except (TypeError, ValueError, UnicodeDecodeError):
return False

return True
1 change: 1 addition & 0 deletions tests/core/usage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_consolidates_output_basic(self):
'ArtifactoryDetector': {},
'StripeDetector': {},
'MailchimpDetector': {},
'JwtTokenDetector': {},
}
assert not hasattr(args, 'no_private_key_scan')

Expand Down
14 changes: 14 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_scan_string_basic(
Base64HighEntropyString: {}
BasicAuthDetector : False
HexHighEntropyString : {}
JwtTokenDetector : False
KeywordDetector : False
MailchimpDetector : False
PrivateKeyDetector : False
Expand All @@ -120,6 +121,7 @@ def test_scan_string_cli_overrides_stdin(self):
Base64HighEntropyString: False (2.585)
BasicAuthDetector : False
HexHighEntropyString : False (2.121)
JwtTokenDetector : False
KeywordDetector : False
MailchimpDetector : False
PrivateKeyDetector : False
Expand Down Expand Up @@ -254,6 +256,9 @@ def test_old_baseline_ignored_with_update_flag(
'hex_limit': 3,
'name': 'HexHighEntropyString',
},
{
'name': 'JwtTokenDetector',
},
{
'name': 'KeywordDetector',
},
Expand Down Expand Up @@ -294,6 +299,9 @@ def test_old_baseline_ignored_with_update_flag(
'hex_limit': 3,
'name': 'HexHighEntropyString',
},
{
'name': 'JwtTokenDetector',
},
{
'name': 'KeywordDetector',
},
Expand Down Expand Up @@ -387,6 +395,9 @@ def test_old_baseline_ignored_with_update_flag(
{
'name': 'BasicAuthDetector',
},
{
'name': 'JwtTokenDetector',
},
{
'name': 'MailchimpDetector',
},
Expand Down Expand Up @@ -426,6 +437,9 @@ def test_old_baseline_ignored_with_update_flag(
{
'name': 'BasicAuthDetector',
},
{
'name': 'JwtTokenDetector',
},
{
'name': 'MailchimpDetector',
},
Expand Down
46 changes: 46 additions & 0 deletions tests/plugins/jwt_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import absolute_import

import pytest

from detect_secrets.plugins.jwt import JwtTokenDetector


class TestJwtTokenDetector(object):

@pytest.mark.parametrize(
'payload, should_flag',
[
# valid jwt
('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True), # noqa: E501
# valid jwt - but header contains CR/LF-s
('eyJ0eXAiOiJKV1QiLA0KImFsZyI6IkhTMjU2In0.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True), # noqa: E501
# valid jwt - but claims contain bunch of LF newlines
('eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJuYW1lIjoiSm9lIiwKInN0YXR1cyI6ImVtcGxveWVlIgp9', True), # noqa: E501
# valid jwt - claims contain strings with unicode accents
('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IsWww6HFkcOtIMOWxZHDqcOoIiwiaWF0IjoxNTE2MjM5MDIyfQ.k5HibI_uLn_RTuPcaCNkaVaQH2y5q6GvJg8GPpGMRwQ', True), # noqa: E501
# as unicode literal
(u'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True), # noqa: E501
# no signature - but still valid
('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True), # noqa: E501
# decoded - invalid
('{"alg":"HS256","typ":"JWT"}.{"name":"Jon Doe"}.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501
# invalid json - invalid (caught by regex)
('bm90X3ZhbGlkX2pzb25fYXRfYWxs.bm90X3ZhbGlkX2pzb25fYXRfYWxs.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501
# missing claims - invalid
('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9', False), # noqa: E501
# totally not a jwt
('jwt', False), # noqa: E501
# invalid json with random bytes
('eyJhbasdGciOiJIUaddasdasfsasdasdzI1NiIasdsInR5cCI6IkpXVCasdJasd9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501
# invalid json in jwt header - invalid (caught by parsing)
('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False), # noqa: E501
# good by regex, but otherwise totally not JWT
('eyJAAAA.eyJBBB', False), # noqa: E501
('eyJBB.eyJCC.eyJDDDD', False), # noqa: E501
],
)
def test_analyze_string(self, payload, should_flag):
logic = JwtTokenDetector()

output = logic.analyze_string(payload, 1, 'mock_filename')
assert len(output) == int(should_flag)
3 changes: 3 additions & 0 deletions tests/pre_commit_hook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def test_that_baseline_gets_updated(
'hex_limit': 3,
'name': 'HexHighEntropyString',
},
{
'name': 'JwtTokenDetector',
},
{
'name': 'KeywordDetector',
},
Expand Down