Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions filetype/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ def match_document(self, buf):
return

# Loop through next 3 files and check if they match
# NOTE: OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file
# https://github.com/h2non/filetype/blob/d730d98ad5c990883148485b6fd5adbdd378364a/matchers/document.go#L134
idx = 0
for i in range(3):
for i in range(4):
# Search for next file header
idx = self.search_signature(buf, idx + 4, 6000)
if idx == -1:
Expand Down Expand Up @@ -110,7 +112,7 @@ def __init__(self):

def match(self, buf):
if len(buf) > 515 and buf[0:8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1":
if buf[512:515] == b"\xEC\xA5\xC1\x00":
if buf[512:516] == b"\xEC\xA5\xC1\x00":
return True
if (
len(buf) > 2142
Expand Down
11 changes: 10 additions & 1 deletion filetype/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-

from io import BufferedIOBase
# Python 2.7 workaround
try:
import pathlib
Expand Down Expand Up @@ -48,7 +49,8 @@ def get_bytes(obj):
returning a sliced bytearray.

Args:
obj: path to readable, file-like object(with read() method), bytes, bytearray or memoryview
obj: path to readable, file-like object(with read() method), bytes,
bytearray or memoryview

Returns:
First 8192 bytes of the file content as bytearray type.
Expand All @@ -71,6 +73,13 @@ def get_bytes(obj):
if isinstance(obj, pathlib.PurePath):
return get_signature_bytes(obj)

if isinstance(obj, BufferedIOBase):
start_pos = obj.tell()
obj.seek(0)
magic_bytes = obj.read(_NUM_SIGNATURE_BYTES)
obj.seek(start_pos) # restore reader position
return get_bytes(magic_bytes)

if hasattr(obj, 'read'):
return get_bytes(obj.read(_NUM_SIGNATURE_BYTES))

Expand Down
Binary file added tests/fixtures/sample_1.doc
Binary file not shown.
Binary file added tests/fixtures/sample_1.docx
Binary file not shown.
28 changes: 18 additions & 10 deletions tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@
class TestFileType(unittest.TestCase):
def test_guess_jpeg(self):
img_path = FIXTURES + '/sample.jpg'
for obj in (img_path, open(img_path, 'rb')):
kind = filetype.guess(obj)
with open(img_path, 'rb') as fp:
for obj in (img_path, fp):
kind = filetype.guess(obj)
self.assertTrue(kind is not None)
self.assertEqual(kind.mime, 'image/jpeg')
self.assertEqual(kind.extension, 'jpg')
# reset reader position test
kind = filetype.guess(fp)
self.assertTrue(kind is not None)
self.assertEqual(kind.mime, 'image/jpeg')
self.assertEqual(kind.extension, 'jpg')
Expand Down Expand Up @@ -70,16 +76,18 @@ def test_guess_zstd(self):
self.assertEqual(kind.extension, 'zst')

def test_guess_doc(self):
kind = filetype.guess(FIXTURES + '/sample.doc')
self.assertIsNotNone(kind)
self.assertEqual(kind.mime, 'application/msword')
self.assertEqual(kind.extension, 'doc')
for name in 'sample.doc', 'sample_1.doc':
kind = filetype.guess(os.path.join(FIXTURES, name))
self.assertIsNotNone(kind)
self.assertEqual(kind.mime, 'application/msword')
self.assertEqual(kind.extension, 'doc')

def test_guess_docx(self):
kind = filetype.guess(FIXTURES + '/sample.docx')
self.assertTrue(kind is not None)
self.assertEqual(kind.mime, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')
self.assertEqual(kind.extension, 'docx')
for name in 'sample.docx', 'sample_1.docx':
kind = filetype.guess(os.path.join(FIXTURES, name))
self.assertTrue(kind is not None)
self.assertEqual(kind.mime, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')
self.assertEqual(kind.extension, 'docx')

def test_guess_odt(self):
kind = filetype.guess(FIXTURES + '/sample.odt')
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ envlist = py{27,35,36,37,38,39}, lint, doc, clean
skip_missing_interpreters = true

[testenv:test]
deps = pytest-benchmark
commands = pytest
deps = pytest
commands = pytest --ignore=tests/test_benchmark.py

[testenv:lint]
basepython = python3
Expand Down