Skip to content

Commit

Permalink
Fix confidence scoring flaw
Browse files Browse the repository at this point in the history
  • Loading branch information
cdgriffith committed Jun 18, 2020
1 parent 3228b84 commit 81312fd
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 15 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
=========

Version 1.10
------------

- Fixing how confidence works

Version 1.9
-----------

Expand Down
6 changes: 3 additions & 3 deletions puremagic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from collections import namedtuple

__author__ = "Chris Griffith"
__version__ = "1.9"
__version__ = "1.10"
__all__ = [
"magic_file",
"magic_string",
Expand Down Expand Up @@ -79,8 +79,8 @@ def _confidence(matches, ext=None):
""" Rough confidence based on string length and file extension"""
results = []
for match in matches:
con = 0.8 if len(match.extension) > 9 else float("0.{0}".format(len(match.extension)))
if ext == match.extension:
con = 0.8 if len(match.byte_match) > 9 else float("0.{0}".format(len(match.byte_match)))
if ext and ext == match.extension:
con = 0.9
results.append(PureMagicWithConfidence(confidence=con, **match._asdict()))
return sorted(results, key=lambda x: x.confidence, reverse=True)
Expand Down
12 changes: 0 additions & 12 deletions test/test_common_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,6 @@ def test_string(self):
ext = puremagic.from_string(bytes(self.mp4magic))
self.assertEqual(self.expect_ext, ext)

def test_string_with_filename_hint(self):
"""String identification with filename hint """
filename = os.path.join(OFFICE_DIR, "test.xlsx")
with open(filename, "rb") as f:
data = f.read()
ext = puremagic.from_string(data)
# .docx and .xlsx have same signature
self.assertEqual(".docx", ext)
# with the hint from_string() shoud find the correct extension
ext = puremagic.from_string(data, filename=filename)
self.assertEqual(".xlsx", ext)

def test_string_with_confidence(self):
"""String identification: magic_string """
ext = puremagic.magic_string(bytes(self.mp4magic))
Expand Down

0 comments on commit 81312fd

Please sign in to comment.