Skip to content

Commit

Permalink
Merge pull request #446 from christian-intra2net/unittest-fixes
Browse files Browse the repository at this point in the history
Unittest fixes, reduce output
  • Loading branch information
decalage2 committed May 20, 2019
2 parents efa387d + f3dbed9 commit d138bff
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 119 deletions.
3 changes: 2 additions & 1 deletion oletools/crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def is_encrypted(some_file):
if zipfile.is_zipfile(some_file):
return _is_encrypted_zip(some_file)
# otherwise assume it is the name of an ole file
return _is_encrypted_ole(OleFileIO(some_file))
with OleFileIO(some_file) as ole:
return _is_encrypted_ole(ole)
except Exception as exc:
log.warning('Failed to check {} for encryption ({}); assume it is not '
'encrypted.'.format(some_file, exc))
Expand Down
37 changes: 22 additions & 15 deletions oletools/msodde.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,17 +493,23 @@ def process_xls(filepath):
""" find dde links in excel ole file """

result = []
for stream in xls_parser.XlsFile(filepath).iter_streams():
if not isinstance(stream, xls_parser.WorkbookStream):
continue
for record in stream.iter_records():
if not isinstance(record, xls_parser.XlsRecordSupBook):
xls_file = None
try:
xls_file = xls_parser.XlsFile(filepath)
for stream in xls_file.iter_streams():
if not isinstance(stream, xls_parser.WorkbookStream):
continue
if record.support_link_type in (
xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
result.append(record.virt_path.replace(u'\u0003', u' '))
return u'\n'.join(result)
for record in stream.iter_records():
if not isinstance(record, xls_parser.XlsRecordSupBook):
continue
if record.support_link_type in (
xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
result.append(record.virt_path.replace(u'\u0003', u' '))
return u'\n'.join(result)
finally:
if xls_file is not None:
xls_file.close()


def process_docx(filepath, field_filter_mode=None):
Expand Down Expand Up @@ -908,13 +914,12 @@ def process_file(filepath, field_filter_mode=None):
if xls_parser.is_xls(filepath):
logger.debug('Process file as excel 2003 (xls)')
return process_xls(filepath)

ole = olefile.OleFileIO(filepath, path_encoding=None)
if is_ppt(ole):
if is_ppt(filepath):
logger.debug('is ppt - cannot have DDE')
return u''
logger.debug('Process file as word 2003 (doc)')
return process_doc(ole)
with olefile.OleFileIO(filepath, path_encoding=None) as ole:
return process_doc(ole)

with open(filepath, 'rb') as file_handle:
if file_handle.read(4) == RTF_START:
Expand Down Expand Up @@ -970,6 +975,7 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
if not crypto.is_encrypted(filepath):
return result
except Exception:
logger.debug('Ignoring exception:', exc_info=True)
if not crypto.is_encrypted(filepath):
raise

Expand Down Expand Up @@ -997,7 +1003,8 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
try: # (maybe file was not yet created)
os.unlink(decrypted_file)
except Exception:
pass
logger.debug('Ignoring exception closing decrypted file:',
exc_info=True)
return result


Expand Down
52 changes: 29 additions & 23 deletions oletools/oleobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,29 +526,35 @@ def find_ole_in_ppt(filename):
can contain the actual embedded file we are looking for (caller will check
for these).
"""
for stream in PptFile(filename).iter_streams():
for record_idx, record in enumerate(stream.iter_records()):
if isinstance(record, PptRecordExOleVbaActiveXAtom):
ole = None
try:
data_start = next(record.iter_uncompressed())
if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:
continue # could be an ActiveX control or VBA Storage

# otherwise, this should be an OLE object
log.debug('Found record with embedded ole object in ppt '
'(stream "{0}", record no {1})'
.format(stream.name, record_idx))
ole = record.get_data_as_olefile()
yield ole
except IOError:
log.warning('Error reading data from {0} stream or '
'interpreting it as OLE object'
.format(stream.name))
log.debug('', exc_info=True)
finally:
if ole is not None:
ole.close()
ppt_file = None
try:
ppt_file = PptFile(filename)
for stream in ppt_file.iter_streams():
for record_idx, record in enumerate(stream.iter_records()):
if isinstance(record, PptRecordExOleVbaActiveXAtom):
ole = None
try:
data_start = next(record.iter_uncompressed())
if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:
continue # could be ActiveX control / VBA Storage

# otherwise, this should be an OLE object
log.debug('Found record with embedded ole object in '
'ppt (stream "{0}", record no {1})'
.format(stream.name, record_idx))
ole = record.get_data_as_olefile()
yield ole
except IOError:
log.warning('Error reading data from {0} stream or '
'interpreting it as OLE object'
.format(stream.name))
log.debug('', exc_info=True)
finally:
if ole is not None:
ole.close()
finally:
if ppt_file is not None:
ppt_file.close()


class FakeFile(io.RawIOBase):
Expand Down
16 changes: 9 additions & 7 deletions oletools/ppt_record_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
sys.path.insert(0, PARENT_DIR)
del PARENT_DIR
from oletools import record_base
from oletools.common.errors import CryptoErrorBase


# types of relevant records (there are much more than listed here)
Expand Down Expand Up @@ -109,10 +108,11 @@
])


# record types where version is not 0x0 or 0xf
# record types where version is not 0x0 or 0x1 or 0xf
VERSION_EXCEPTIONS = dict([
(0x0400, 2), # rt_vbainfoatom
(0x03ef, 2), # rt_slideatom
(0xe9c7, 7), # tests/test-data/encrypted/encrypted.ppt, not investigated
])


Expand Down Expand Up @@ -174,7 +174,7 @@ def is_ppt(filename):
for record in stream.iter_records():
if record.type == 0x0ff5: # UserEditAtom
have_user_edit = True
elif record.type == 0x1772: # PersisDirectoryAtom
elif record.type == 0x1772: # PersistDirectoryAtom
have_persist_dir = True
elif record.type == 0x03e8: # DocumentContainer
have_document_container = True
Expand All @@ -185,10 +185,12 @@ def is_ppt(filename):
return True
else: # ignore other streams/storages since they are optional
continue
except CryptoErrorBase:
raise
except Exception:
pass
except Exception as exc:
logging.debug('Ignoring exception in is_ppt, assume is not ppt',
exc_info=True)
finally:
if ppt_file is not None:
ppt_file.close()
return False


Expand Down
10 changes: 8 additions & 2 deletions oletools/xls_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,18 @@ def is_xls(filename):
substream.
See also: oleid.OleID.check_excel
"""
xls_file = None
try:
for stream in XlsFile(filename).iter_streams():
xls_file = XlsFile(filename)
for stream in xls_file.iter_streams():
if isinstance(stream, WorkbookStream):
return True
except Exception:
pass
logging.debug('Ignoring exception in is_xls, assume is not xls',
exc_info=True)
finally:
if xls_file is not None:
xls_file.close()
return False


Expand Down
104 changes: 65 additions & 39 deletions tests/msodde/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@
from __future__ import print_function

import unittest
from oletools import msodde
from tests.test_utils import DATA_BASE_DIR as BASE_DIR
import sys
import os
from os.path import join
from traceback import print_exc
from oletools import msodde
from oletools.crypto import \
WrongEncryptionPassword, CryptoLibNotImported, check_msoffcrypto
from tests.test_utils import DATA_BASE_DIR as BASE_DIR


class TestReturnCode(unittest.TestCase):
Expand Down Expand Up @@ -46,15 +49,21 @@ def test_valid_xml(self):

def test_invalid_none(self):
""" check that no file argument leads to non-zero exit status """
self.do_test_validity('', True)
if sys.hexversion > 0x03030000: # version 3.3 and higher
# different errors probably depending on whether msoffcryto is
# available or not
expect_error = (AttributeError, FileNotFoundError)
else:
expect_error = (AttributeError, IOError)
self.do_test_validity('', expect_error)

def test_invalid_empty(self):
""" check that empty file argument leads to non-zero exit status """
self.do_test_validity(join(BASE_DIR, 'basic/empty'), True)
self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)

def test_invalid_text(self):
""" check that text file argument leads to non-zero exit status """
self.do_test_validity(join(BASE_DIR, 'basic/text'), True)
self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)

def test_encrypted(self):
"""
Expand All @@ -64,28 +73,38 @@ def test_encrypted(self):
Encryption) is tested.
"""
CRYPT_DIR = join(BASE_DIR, 'encrypted')
ADD_ARGS = '', '-j', '-d', '-f', '-a'
have_crypto = check_msoffcrypto()
for filename in os.listdir(CRYPT_DIR):
full_name = join(CRYPT_DIR, filename)
for args in ADD_ARGS:
self.do_test_validity(args + ' ' + full_name, True)

def do_test_validity(self, args, expect_error=False):
""" helper for test_valid_doc[x] """
have_exception = False
if have_crypto and 'standardpassword' in filename:
# these are automagically decrypted
self.do_test_validity(join(CRYPT_DIR, filename))
elif have_crypto:
self.do_test_validity(join(CRYPT_DIR, filename),
WrongEncryptionPassword)
else:
self.do_test_validity(join(CRYPT_DIR, filename),
CryptoLibNotImported)

def do_test_validity(self, filename, expect_error=None):
""" helper for test_[in]valid_* """
found_error = None
# DEBUG: print('Testing file {}'.format(filename))
try:
msodde.process_file(args, msodde.FIELD_FILTER_BLACKLIST)
except Exception:
have_exception = True
print_exc()
except SystemExit as exc: # sys.exit() was called
have_exception = True
if exc.code is None:
have_exception = False

self.assertEqual(expect_error, have_exception,
msg='Args={0}, expect={1}, exc={2}'
.format(args, expect_error, have_exception))
msodde.process_maybe_encrypted(filename,
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
except Exception as exc:
found_error = exc
# DEBUG: print_exc()

if expect_error and not found_error:
self.fail('Expected {} but msodde finished without errors for {}'
.format(expect_error, filename))
elif not expect_error and found_error:
self.fail('Unexpected error {} from msodde for {}'
.format(found_error, filename))
elif expect_error and not isinstance(found_error, expect_error):
self.fail('Wrong kind of error {} from msodde for {}, expected {}'
.format(type(found_error), filename, expect_error))


class TestDdeLinks(unittest.TestCase):
Expand All @@ -100,33 +119,37 @@ def get_dde_from_output(output):
def test_with_dde(self):
""" check that dde links appear on stdout """
filename = 'dde-test-from-office2003.doc'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
msg='Found no dde links in output of ' + filename)

def test_no_dde(self):
""" check that no dde links appear on stdout """
filename = 'harmless-clean.doc'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertEqual(len(self.get_dde_from_output(output)), 0,
msg='Found dde links in output of ' + filename)

def test_with_dde_utf16le(self):
""" check that dde links appear on stdout """
filename = 'dde-test-from-office2013-utf_16le-korean.doc'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
msg='Found no dde links in output of ' + filename)

def test_excel(self):
""" check that dde links are found in excel 2007+ files """
expect = ['cmd /c calc.exe', ]
for extn in 'xlsx', 'xlsm', 'xlsb':
output = msodde.process_file(
join(BASE_DIR, 'msodde', 'dde-test.' + extn), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', 'dde-test.' + extn),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)

self.assertEqual(expect, self.get_dde_from_output(output),
msg='unexpected output for dde-test.{0}: {1}'
Expand All @@ -136,8 +159,9 @@ def test_xml(self):
""" check that dde in xml from word / excel is found """
for name_part in 'excel2003', 'word2003', 'word2007':
filename = 'dde-in-' + name_part + '.xml'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
links = self.get_dde_from_output(output)
self.assertEqual(len(links), 1, 'found {0} dde-links in {1}'
.format(len(links), filename))
Expand All @@ -149,15 +173,17 @@ def test_xml(self):
def test_clean_rtf_blacklist(self):
""" find a lot of hyperlinks in rtf spec """
filename = 'RTF-Spec-1.7.rtf'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertEqual(len(self.get_dde_from_output(output)), 1413)

def test_clean_rtf_ddeonly(self):
""" find no dde links in rtf spec """
filename = 'RTF-Spec-1.7.rtf'
output = msodde.process_file(
join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_DDE)
output = msodde.process_maybe_encrypted(
join(BASE_DIR, 'msodde', filename),
field_filter_mode=msodde.FIELD_FILTER_DDE)
self.assertEqual(len(self.get_dde_from_output(output)), 0,
msg='Found dde links in output of ' + filename)

Expand Down
Loading

0 comments on commit d138bff

Please sign in to comment.