-
Notifications
You must be signed in to change notification settings - Fork 482
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feature/adding-gibberish-detector'
- Loading branch information
Showing
22 changed files
with
337 additions
and
204 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
include detect_secrets/filters/gibberish/rfc.model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from . import allowlist # noqa: F401 | ||
from . import gibberish # noqa: F401 | ||
from . import heuristic # noqa: F401 | ||
from . import regex # noqa: F401 | ||
from . import wordlist # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
import string | ||
from functools import lru_cache | ||
from typing import Any | ||
from typing import Optional | ||
|
||
from ...core.plugins import Plugin | ||
from ...plugins.private_key import PrivateKeyDetector | ||
from ...settings import get_settings | ||
from ..util import compute_file_hash | ||
|
||
|
||
Model = Any | ||
|
||
|
||
def is_feature_enabled() -> bool: | ||
try: | ||
get_model() | ||
return True | ||
except ImportError: | ||
return False | ||
|
||
|
||
def initialize(model_path: Optional[str] = None, limit: float = 3.7) -> None: | ||
""" | ||
:param limit: this limit was obtained through trial and error. Check out | ||
the original pull request for rationale. | ||
:raises: ValueError | ||
""" | ||
path = model_path | ||
if not path: | ||
path = os.path.join(__path__[0], 'rfc.model') | ||
|
||
model = get_model() | ||
|
||
from gibberish_detector import serializer | ||
from gibberish_detector.exceptions import ParsingError | ||
with open(path) as f: | ||
try: | ||
model.update(serializer.deserialize(f.read())) | ||
except ParsingError: | ||
raise ValueError('Invalid model.') | ||
|
||
config = { | ||
'limit': limit, | ||
} | ||
if model_path: | ||
config['model'] = model_path | ||
config['file_hash'] = compute_file_hash(model_path) | ||
|
||
path = f'{__name__}.should_exclude_secret' | ||
get_settings().filters[path] = config | ||
|
||
|
||
def should_exclude_secret(secret: str, plugin: Optional[Plugin] = None) -> bool: | ||
""" | ||
:param plugin: optional, for easier testing. The dependency injection system | ||
will populate its proper value on complete runs. | ||
""" | ||
# Private keys are actual words, so they will be a false negative. | ||
if isinstance(plugin, PrivateKeyDetector): | ||
return False | ||
|
||
# Through real-life experimentation, we discovered that the gibberish detector | ||
# works best with non-hex strings, since hex strings have a too limited charset | ||
# to fit our trained models. As such, we cannot make a deterministic decision | ||
# in such cases. | ||
if not (set(secret) - set(string.hexdigits + '-')): | ||
return False | ||
|
||
if not get_model().data or not get_model().charset: | ||
raise AssertionError('Attempting to use uninitialized gibberish model.') | ||
|
||
from gibberish_detector.detector import Detector | ||
detector = Detector( | ||
model=get_model(), | ||
threshold=get_settings().filters[f'{__name__}.should_exclude_secret']['limit'], | ||
) | ||
|
||
# TODO: secret.lower() is only used currently, since the default model is only | ||
# trained with lower case letters. However, in the future, if people want to train | ||
# a model that is case-sensitive, we can figure out how to change this. | ||
# Unfortunately, it's not straight-forward to just remove the `.lower()` function call, | ||
# since if the string is *not* lowered (and the model expects it to be), the results | ||
# will be quite different. | ||
return not detector.is_gibberish(secret.lower()) | ||
|
||
|
||
@lru_cache(maxsize=1) | ||
def get_model() -> 'Model': | ||
from gibberish_detector.model import Model | ||
return Model(charset='') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"charset": "abcdefghijklmnopqrstuvwxyz", "ngram_size": 2, "counts": {"a": {"a": 118635, "b": 630875, "c": 1501150, "d": 938413, "e": 64942, "f": 203534, "g": 892717, "h": 50302, "i": 629589, "j": 16393, "k": 122527, "l": 2134199, "m": 865412, "n": 3466183, "o": 39385, "p": 708687, "q": 15378, "r": 1990681, "s": 1639856, "t": 4062626, "u": 390482, "v": 290066, "w": 65833, "x": 148257, "y": 497074, "z": 9679}, "b": {"a": 299747, "b": 38842, "c": 66484, "d": 26575, "e": 1340130, "f": 29026, "g": 32212, "h": 8693, "i": 355357, "j": 188222, "k": 6350, "l": 528022, "m": 42838, "n": 36585, "o": 271797, "p": 20216, "q": 1650, "r": 143544, "s": 134924, "t": 55332, "u": 282480, "v": 8513, "w": 9195, "x": 4962, "y": 350774, "z": 1991}, "c": {"a": 1583493, "b": 55072, "c": 366166, "d": 99397, "e": 1680049, "f": 61790, "g": 22751, "h": 1138161, "i": 647914, "j": 23415, "k": 608337, "l": 409128, "m": 129894, "n": 63692, "o": 2266996, "p": 202574, "q": 8192, "r": 525914, "s": 234381, "t": 1400393, "u": 582740, "v": 25213, "w": 27222, "x": 12164, "y": 94929, "z": 3613}, "d": {"a": 1102196, "b": 395535, "c": 205472, "d": 521902, "e": 2349496, "f": 230444, "g": 90095, "h": 107584, "i": 1575337, "j": 30840, "k": 23781, "l": 151769, "m": 188851, "n": 183549, "o": 773165, "p": 252828, "q": 10114, "r": 490139, "s": 679426, "t": 677199, "u": 321170, "v": 101547, "w": 168753, "x": 13779, "y": 59907, "z": 4936}, "e": {"a": 1743853, "b": 293141, "c": 2504452, "d": 3070491, "e": 835273, "f": 950803, "g": 431308, "h": 176469, "i": 1177449, "j": 38230, "k": 70232, "l": 1227191, "m": 1322498, "n": 4007948, "o": 705920, "p": 939044, "q": 450185, "r": 5234236, "s": 4637943, "t": 2734401, "u": 284887, "v": 537744, "w": 395249, "x": 721758, "y": 247129, "z": 15923}, "f": {"a": 482902, "b": 46301, "c": 776635, "d": 74453, "e": 459377, "f": 324747, "g": 15818, "h": 24876, "i": 1326013, "j": 5135, "k": 7786, "l": 154253, "m": 68036, "n": 47090, "o": 1525315, "p": 71256, "q": 7352, "r": 399118, "s": 158690, "t": 743932, "u": 192428, "v": 25544, "w": 23974, "x": 11395, "y": 80258, "z": 5178}, "g": {"a": 374124, "b": 43000, "c": 75143, "d": 54891, "e": 1210541, "f": 61951, "g": 89766, "h": 265348, "i": 434656, "j": 9650, "k": 8369, "l": 111072, "m": 141157, "n": 262172, "o": 233776, "p": 126045, "q": 3985, "r": 368752, "s": 199803, "t": 394592, "u": 242465, "v": 16518, "w": 40581, "x": 7464, "y": 46646, "z": 4033}, "h": {"a": 1557099, "b": 48781, "c": 89817, "d": 47268, "e": 4944439, "f": 32193, "g": 12079, "h": 20750, "i": 1105135, "j": 3953, "k": 9651, "l": 32170, "m": 125637, "n": 76152, "o": 775525, "p": 47687, "q": 3047, "r": 145761, "s": 83710, "t": 413869, "u": 73213, "v": 16513, "w": 22152, "x": 4223, "y": 37868, "z": 6960}, "i": {"a": 545417, "b": 441765, "c": 1649054, "d": 823145, "e": 1026129, "f": 883435, "g": 624619, "h": 16297, "i": 47452, "j": 11907, "k": 56496, "l": 788318, "m": 672818, "n": 5188085, "o": 2696322, "p": 641038, "q": 31721, "r": 507827, "s": 2474217, "t": 2145858, "u": 27798, "v": 521907, "w": 8937, "x": 103556, "y": 3155, "z": 178043}, "j": {"a": 65309, "b": 5150, "c": 4630, "d": 3609, "e": 207116, "f": 2967, "g": 2140, "h": 3674, "i": 12565, "j": 1966, "k": 4624, "l": 3278, "m": 6958, "n": 5271, "o": 59640, "p": 7497, "q": 1402, "r": 4799, "s": 15013, "t": 4209, "u": 80334, "v": 2082, "w": 6849, "x": 1481, "y": 1382, "z": 1520}, "k": {"a": 91393, "b": 24180, "c": 35534, "d": 25923, "e": 557437, "f": 32487, "g": 8856, "h": 16581, "i": 173899, "j": 5028, "k": 9027, "l": 40471, "m": 36600, "n": 89335, "o": 53389, "p": 133808, "q": 2485, "r": 28645, "s": 177604, "t": 62173, "u": 27442, "v": 5748, "w": 27477, "x": 2830, "y": 6013, "z": 1805}, "l": {"a": 929370, "b": 103217, "c": 139662, "d": 512423, "e": 1824337, "f": 113308, "g": 78226, "h": 36656, "i": 1450931, "j": 14335, "k": 24164, "l": 983903, "m": 107923, "n": 110976, "o": 905778, "p": 211582, "q": 5416, "r": 114840, "s": 608836, "t": 453915, "u": 460820, "v": 96718, "w": 49654, "x": 6631, "y": 577831, "z": 11631}, "m": {"a": 1635769, "b": 287867, "c": 67799, "d": 50279, "e": 2204807, "f": 38281, "g": 16738, "h": 16864, "i": 672936, "j": 6422, "k": 6652, "l": 90860, "m": 268828, "n": 40050, "o": 527359, "p": 791104, "q": 3281, "r": 47377, "s": 279178, "t": 188486, "u": 454062, "v": 16237, "w": 25264, "x": 6360, "y": 9965, "z": 2639}, "n": {"a": 1538477, "b": 200065, "c": 997362, "d": 2330762, "e": 1952162, "f": 625660, "g": 1940478, "h": 83259, "i": 1015263, "j": 28397, "k": 150610, "l": 214383, "m": 226678, "n": 356573, "o": 1367298, "p": 210677, "q": 9545, "r": 232943, "s": 1723024, "t": 4051859, "u": 306661, "v": 137961, "w": 115060, "x": 18770, "y": 191590, "z": 12659}, "o": {"a": 333611, "b": 507058, "c": 1030631, "d": 621611, "e": 154901, "f": 1509661, "g": 182750, "h": 53343, "i": 324137, "j": 11882, "k": 87654, "l": 804704, "m": 994391, "n": 4575413, "o": 219213, "p": 683677, "q": 4149, "r": 3464564, "s": 641767, "t": 1406855, "u": 1149771, "v": 410511, "w": 561073, "x": 56638, "y": 39476, "z": 8869}, "p": {"a": 1189585, "b": 42076, "c": 124961, "d": 126398, "e": 1242826, "f": 80409, "g": 17643, "h": 137438, "i": 266019, "j": 9649, "k": 35379, "l": 828131, "m": 106979, "n": 68507, "o": 908296, "p": 557551, "q": 4360, "r": 1408275, "s": 295854, "t": 588109, "u": 216552, "v": 162392, "w": 69550, "x": 9971, "y": 42770, "z": 2061}, "q": {"a": 5362, "b": 2396, "c": 3695, "d": 4903, "e": 2437, "f": 2492, "g": 1606, "h": 1937, "i": 6228, "j": 1178, "k": 1502, "l": 2222, "m": 5600, "n": 6138, "o": 16223, "p": 3612, "q": 2178, "r": 4314, "s": 7449, "t": 4457, "u": 523091, "v": 2218, "w": 1957, "x": 1602, "y": 1593, "z": 1147}, "r": {"a": 1884150, "b": 108467, "c": 469520, "d": 619333, "e": 4613547, "f": 928117, "g": 206040, "h": 70002, "i": 1924644, "j": 14216, "k": 336855, "l": 161805, "m": 782008, "n": 396973, "o": 2026650, "p": 279715, "q": 9201, "r": 543892, "s": 939303, "t": 1323019, "u": 276419, "v": 443167, "w": 154662, "x": 13149, "y": 462288, "z": 6346}, "s": {"a": 1571556, "b": 200500, "c": 850779, "d": 386238, "e": 3128653, "f": 376110, "g": 84160, "h": 508213, "i": 1940017, "j": 27903, "k": 82209, "l": 194818, "m": 459368, "n": 353397, "o": 1116229, "p": 919294, "q": 11941, "r": 342864, "s": 1854672, "t": 3581542, "u": 824325, "v": 78034, "w": 307467, "x": 24837, "y": 215803, "z": 8336}, "t": {"a": 2239522, "b": 240284, "c": 445118, "d": 188710, "e": 3539356, "f": 299799, "g": 48582, "h": 6502699, "i": 4442428, "j": 13452, "k": 25898, "l": 277765, "m": 287203, "n": 183293, "o": 2396902, "p": 414605, "q": 8754, "r": 1552404, "s": 1302370, "t": 1061425, "u": 546833, "v": 47746, "w": 473638, "x": 30926, "y": 739526, "z": 16025}, "u": {"a": 254536, "b": 217707, "c": 270253, "d": 169700, "e": 638912, "f": 36218, "g": 117660, "h": 6047, "i": 258834, "j": 3202, "k": 8410, "l": 739209, "m": 462935, "n": 660975, "o": 17845, "p": 395636, "q": 1456, "r": 975780, "s": 1272647, "t": 937723, "u": 9229, "v": 4979, "w": 4665, "x": 7779, "y": 2786, "z": 3638}, "v": {"a": 553164, "b": 9641, "c": 37594, "d": 16968, "e": 1438129, "f": 10523, "g": 4867, "h": 8538, "i": 626442, "j": 5603, "k": 2505, "l": 16039, "m": 21836, "n": 13803, "o": 81911, "p": 80950, "q": 2070, "r": 27617, "s": 33989, "t": 22994, "u": 9415, "v": 8120, "w": 6624, "x": 3053, "y": 3050, "z": 1843}, "w": {"a": 328181, "b": 12472, "c": 24164, "d": 16960, "e": 296182, "f": 14279, "g": 12677, "h": 420451, "i": 733532, "j": 2434, "k": 6230, "l": 49180, "m": 17167, "n": 100428, "o": 450177, "p": 19437, "q": 1897, "r": 77674, "s": 115640, "t": 62041, "u": 7219, "v": 6072, "w": 98652, "x": 2572, "y": 4494, "z": 1484}, "x": {"a": 195222, "b": 15941, "c": 99442, "d": 23958, "e": 57299, "f": 23622, "g": 4359, "h": 9176, "i": 121876, "j": 3221, "k": 2420, "l": 12061, "m": 60813, "n": 10788, "o": 24409, "p": 147454, "q": 1549, "r": 18376, "s": 56593, "t": 281697, "u": 8440, "v": 6979, "w": 6973, "x": 34004, "y": 28368, "z": 1965}, "y": {"a": 299426, "b": 136713, "c": 149196, "d": 90086, "e": 172967, "f": 77444, "g": 22164, "h": 43433, "i": 236429, "j": 8732, "k": 13359, "l": 81444, "m": 131819, "n": 169093, "o": 195258, "p": 444042, "q": 5482, "r": 140869, "s": 328005, "t": 375785, "u": 48208, "v": 23601, "w": 81979, "x": 4858, "y": 8417, "z": 7340}, "z": {"a": 76890, "b": 2486, "c": 3088, "d": 2886, "e": 144164, "f": 1805, "g": 1705, "h": 9255, "i": 15379, "j": 1740, "k": 1506, "l": 3047, "m": 2930, "n": 2134, "o": 21144, "p": 2423, "q": 1128, "r": 7939, "s": 5601, "t": 3439, "u": 2997, "v": 2471, "w": 2121, "x": 1812, "y": 2670, "z": 4454}}} |
Oops, something went wrong.