Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect interactive ppt features #786

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions oletools/oleobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import re
import sys
import io
from zipfile import is_zipfile
from zipfile import BadZipFile, is_zipfile
import random

import olefile
Expand All @@ -70,7 +70,7 @@
from oletools.thirdparty import xglob
from oletools.ppt_record_parser import (is_ppt, PptFile,
PptRecordExOleVbaActiveXAtom)
from oletools.ooxml import XmlParser
from oletools.ooxml import XmlParser, BadOOXML
from oletools.common.io_encoding import ensure_stdout_handles_unicode

# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -578,7 +578,7 @@ def get_sane_embedded_filenames(filename, src_path, tmp_path, max_len,

# identify suffix. Dangerous suffixes are all short
idx = candidate.rfind('.')
if idx is -1:
if idx == -1:
candidates_without_suffix.append(candidate)
continue
elif idx < len(candidate)-5:
Expand Down Expand Up @@ -866,7 +866,7 @@ def process_file(filename, data, output_dir=None):
did_dump = False

xml_parser = None
if is_zipfile(filename):
try: # do not trust is_zipfile, can easily be fooled
log.info('file could be an OOXML file, looking for relationships with '
'external links')
xml_parser = XmlParser(filename)
Expand All @@ -878,6 +878,10 @@ def process_file(filename, data, output_dir=None):
for target in find_customUI(xml_parser):
did_dump = True
print("Found customUI tag with external link or VBA macro %s (possibly exploiting CVE-2021-42292)" % target)
except (BadZipFile, BadOOXML, UnicodeDecodeError):
log.debug("", exc_info=True)
log.info("Not an OOXML file after all")


# look for ole files inside file (e.g. unzip docx)
# have to finish work on every ole stream inside iteration, since handles
Expand Down
107 changes: 106 additions & 1 deletion oletools/olevba.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@
from oletools.common import codepages
from oletools import ftguess
from oletools.common.log_helper import log_helper
from oletools.ppt_record_parser import PptFile, PptContainerRecord, RECORD_TYPES, \
PptRecordCString, PptRecordExOleObjAtom

# === PYTHON 2+3 SUPPORT ======================================================

Expand Down Expand Up @@ -647,6 +649,8 @@ def __init__(self, stream_path, variable, expected, value):
r'\w+_ProgressChange', r'\w+_PropertyChange', r'\w+_SetSecureLockIcon',
r'\w+_StatusTextChange', r'\w+_TitleChange', r'\w+_MouseMove', r'\w+_MouseEnter',
r'\w+_MouseLeave', r'\w+_Layout', r'\w+_OnConnecting', r'\w+_FollowHyperlink', r'\w+_ContentControlOnEnter'),
'Runs when the file is opened and Mouse Clicks or Hovers over element':
(r'MouseClick/OverInteractiveInfoContainer',),
}

# Suspicious Keywords that may be used by malware
Expand Down Expand Up @@ -903,6 +907,10 @@ def __init__(self, stream_path, variable, expected, value):
# (must be bytes for Python 3)
re_printable_string = re.compile(b'[\\t\\r\\n\\x20-\\xFF]{5,}')

# ppt record types that contain interactive content (like ActiveX)
# see ppt_record_parser.RECORD_TYPES for meaning of these type constants
PPT_INTERACTIVE_RECORD_TYPES = 0x0fc3, 0x0ff2, 0x0fd7


# === PARTIAL VBA GRAMMAR ====================================================

Expand Down Expand Up @@ -1682,6 +1690,7 @@ def __init__(self, ole, vba_root, project_path, dir_path, relaxed=True):
self.dir_stream = dir_stream

# reference: MS-VBAL 2.3.4.2 dir Stream: Version Independent Project Information
# This could be integrated with record parsing code in record_base.py

# PROJECTSYSKIND Record
# Specifies the platform for which the VBA project is created.
Expand Down Expand Up @@ -2703,6 +2712,7 @@ def __init__(self, filename, data=None, container=None, relaxed=True, encoding=D
self.vba_forms = None
self.contains_vba_macros = None # will be set to True or False by detect_vba_macros
self.contains_xlm_macros = None # will be set to True or False by detect_xlm_macros
self.contains_ppt_interactive = None # will be set to True or False by detect_ppt_interactive
self.vba_code_all_modules = None # to store the source code of all modules
# list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
self.modules = None
Expand All @@ -2720,7 +2730,9 @@ def __init__(self, filename, data=None, container=None, relaxed=True, encoding=D
#: Encoding for VBA source code and strings returned by all methods
self.encoding = encoding
self.xlm_macros = []
self.ppt_interactive = []
self.no_xlm = False
self.no_ppt_interactive = False # nowhere set yet, but include switch for future
#: Output from pcodedmp, disassembly of the VBA P-code
self.disable_pcode = disable_pcode
self.pcodedmp_output = None
Expand Down Expand Up @@ -3259,13 +3271,20 @@ def detect_macros(self):
by calling detect_vba_macros and detect_xlm_macros.
(if the no_xlm option is set, XLM macros are not checked)

Also checks ppt files for ActiveX-like record types using self.detect_ppt_interactive
(if self.no_ppt_interactive is not set).

:return: bool, True if at least one VBA project has been found, False otherwise
"""
vba = self.detect_vba_macros()
xlm = False
found_ppt_interactive = False
if not self.no_xlm:
xlm = self.detect_xlm_macros()
return (vba or xlm)
if not self.no_ppt_interactive:
found_ppt_interactive = self.detect_ppt_interactive()

return (vba or xlm or found_ppt_interactive)

def detect_vba_macros(self):
"""
Expand Down Expand Up @@ -3293,6 +3312,7 @@ def detect_vba_macros(self):
for ole_subfile in self.ole_subfiles:
log.debug("ole subfile {}".format(ole_subfile))
ole_subfile.no_xlm = self.no_xlm
ole_subfile.no_ppt_interactive = self.no_ppt_interactive
if ole_subfile.detect_vba_macros():
self.contains_vba_macros = True
return True
Expand Down Expand Up @@ -3451,6 +3471,59 @@ def _extract_xlm_plugin_biff(self):
self.contains_xlm_macros = False
return False

def detect_ppt_interactive(self):
"""
Search through record structure of file and find problematic record types.

Remembers problematic records in `self.ppt_interactive

:return: True if record types from PPT_INTERACTIVE_RECORD_TYPES were found
"""
# do not search again
if self.contains_ppt_interactive is not None:
return self.contains_ppt_interactive

if self.type != TYPE_PPT:
self.contains_ppt_interactive = False
return False

with PptFile(self.filename) as ppt: # this is from ppt_record_parser
for stream in ppt.iter_streams():
log.debug('Parse records in ' + str(stream))
for record in stream.iter_records():
self._detect_ppt_interactive(record, 1, stream.name)
if self.ppt_interactive:
self.contains_ppt_interactive = True
return self.contains_ppt_interactive

def _detect_ppt_interactive(self, record, indent, stream_name):
"""Recursive helper for detect_ppt_interactive."""
log.debug('{0}{1}'.format(' ' * indent, record))
if record.type in PPT_INTERACTIVE_RECORD_TYPES:
# add record, avoiding duplicates (which ppt likes to contain)
if isinstance(record, PptRecordExOleObjAtom):
if record.obj_type != 2: # not ActiveX
return
texts = set()
if isinstance(record, PptContainerRecord):
for subrec in record.get_records():
if not isinstance(subrec, PptRecordCString):
continue
texts.add(subrec.get_string().strip().rstrip('/'))
if texts:
text = '{0}: {1}'.format(RECORD_TYPES[record.type], ', '.join(texts))
else:
text = RECORD_TYPES[record.type]
try:
previous_idx = self.ppt_interactive.index([text, stream_name, False])
self.ppt_interactive[previous_idx][2] = True # mark as duplicated instead of adding again
except ValueError: # no such index
self.ppt_interactive.append([text, stream_name, False])
if isinstance(record, PptContainerRecord):
for subrec in record.get_records():
self._detect_ppt_interactive(subrec, indent+1, stream_name)
# todo: is record contains ole streams, then parse those or add to substreams

def detect_is_encrypted(self):
if self.ole_file:
self.is_encrypted = crypto.is_encrypted(self.ole_file)
Expand Down Expand Up @@ -3512,6 +3585,19 @@ def extract_macros(self):
for line in self.xlm_macros:
vba_code += "' " + line + '\n'
yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code)
# ...and interactive components found in PPT files (copy of this bit later in function)
if self.ppt_interactive:
# group by stream
curr_stream = self.ppt_interactive[0][1]
texts = []
for text, stream_name, _ in self.ppt_interactive:
if stream_name == curr_stream:
texts.append(text)
else:
yield (self.filename, curr_stream, '', '\n'.join(texts))
texts = [text,]
curr_stream = stream_name
yield (self.filename, curr_stream, '', '\n'.join(texts))
else:
# This is an OLE file:
self.find_vba_projects()
Expand Down Expand Up @@ -3574,6 +3660,19 @@ def extract_macros(self):
for line in self.xlm_macros:
vba_code += "' " + line + '\n'
yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code)
# probably never happens here, but just in case (code copied from above):
if self.ppt_interactive:
# group by stream
curr_stream = self.ppt_interactive[0][1]
texts = []
for text, stream_name, _ in self.ppt_interactive:
if stream_name == curr_stream:
texts.append(text)
else:
yield (self.filename, curr_stream, '', '\n'.join(texts))
texts = [text,]
curr_stream = stream_name
yield (self.filename, curr_stream, '', '\n'.join(texts))
# Analyse the VBA P-code to detect VBA stomping:
# If stomping is detected, add a fake VBA module with the P-code as source comments
# so that VBA_Scanner can find keywords and IOCs in it
Expand Down Expand Up @@ -3654,6 +3753,12 @@ def analyze_macros(self, show_decoded_strings=False, deobfuscate=False):
description = 'XLM macro found. It may contain malicious code'
scanner.suspicious_keywords.append((keyword, description))
scanner.results.append(('Suspicious', keyword, description))
if self.contains_ppt_interactive:
log.debug('adding PPT interactive found to suspicious keywords')
keyword = 'Interactive Controls'
description = 'Found interactive controls. May execute malicious code'
scanner.suspicious_keywords.append((keyword, description))
scanner.results.append(('Suspicious', keyword, description))
# TODO: this has been temporarily disabled
if self.template_injection_found:
log.debug('adding Template Injection to suspicious keywords')
Expand Down
Loading