From 5cb6074ab25d97832f97e88e6c6648dc61af8829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gonz=C3=A1lez=20Lopes?= Date: Tue, 14 May 2019 19:49:38 +0000 Subject: [PATCH] Improve handle of un-scannable files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel González Lopes Fix style Signed-off-by: Daniel González Lopes Improve style Signed-off-by: Daniel González Lopes --- detect_secrets/core/constants.py | 28 +++++++++++++++++++++++ detect_secrets/core/secrets_collection.py | 4 +++- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 detect_secrets/core/constants.py diff --git a/detect_secrets/core/constants.py b/detect_secrets/core/constants.py new file mode 100644 index 000000000..4e80dab49 --- /dev/null +++ b/detect_secrets/core/constants.py @@ -0,0 +1,28 @@ +# We don't scan files with these extensions. +# NOTE: We might be able to do this better with +# `subprocess.check_output(['file', filename])` +# and look for "ASCII text", but that might be more expensive. +# +# Definitely something to look into, if this list gets unruly long. +IGNORED_FILE_EXTENSIONS = { + '7z', + 'bmp', + 'bz2', + 'dmg', + 'exe', + 'gif', + 'gz', + 'ico', + 'jar', + 'jpg', + 'jpeg', + 'png', + 'rar', + 'realm', + 's7z', + 'tar', + 'tif', + 'tiff', + 'webp', + 'zip', +} diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index f033944be..99972aed6 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -8,6 +8,7 @@ from time import strftime from detect_secrets import VERSION +from detect_secrets.core.constants import IGNORED_FILE_EXTENSIONS from detect_secrets.core.log import log from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.common import initialize @@ -200,7 +201,8 @@ def scan_file(self, filename, filename_key=None): if os.path.islink(filename): return False - + if os.path.splitext(filename)[1] in IGNORED_FILE_EXTENSIONS: + return False try: with codecs.open(filename, encoding='utf-8') as f: self._extract_secrets_from_file(f, filename_key)