Skip to content

Commit

Permalink
Added preferred language extraction option log2timeline#636 (log2time…
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Oct 16, 2021
1 parent 083fb79 commit c1998df
Show file tree
Hide file tree
Showing 14 changed files with 178 additions and 115 deletions.
125 changes: 86 additions & 39 deletions plaso/cli/extraction_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from plaso.parsers import manager as parsers_manager
from plaso.parsers import presets as parsers_presets
from plaso.storage import factory as storage_factory
from plaso.winnt import language_ids


class ExtractionTool(
Expand All @@ -44,6 +45,8 @@ class ExtractionTool(
"""Extraction CLI tool.
Attributes:
list_language_identifiers (bool): True if information about the language
identifiers should be shown.
list_time_zones (bool): True if the time zones should be listed.
"""

Expand Down Expand Up @@ -77,6 +80,8 @@ def __init__(self, input_reader=None, output_writer=None):
self._expanded_parser_filter_expression = None
self._number_of_extraction_workers = 0
self._parser_filter_expression = None
self._preferred_codepage = None
self._preferred_language = None
self._preferred_time_zone = None
self._preferred_year = None
self._presets_file = None
Expand All @@ -98,6 +103,7 @@ def __init__(self, input_reader=None, output_writer=None):
self._worker_timeout = None
self._yara_rules_string = None

self.list_language_identifiers = False
self.list_time_zones = False

def _CreateProcessingConfiguration(self, knowledge_base):
Expand Down Expand Up @@ -265,6 +271,36 @@ def _IsArchiveFile(self, path_spec):

return bool(type_indicators)

def _ParseExtractionOptions(self, options):
"""Parses the extraction options.
Args:
options (argparse.Namespace): command line arguments.
Raises:
BadConfigOption: if the options are invalid.
"""
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=['language'])

# TODO: add preferred encoding

self.list_language_identifiers = self._preferred_language == 'list'

time_zone_string = self.ParseStringOption(options, 'timezone')
if isinstance(time_zone_string, str):
if time_zone_string.lower() == 'list':
self.list_time_zones = True

elif time_zone_string:
try:
pytz.timezone(time_zone_string)
except pytz.UnknownTimeZoneError:
raise errors.BadConfigOption(
'Unknown time zone: {0:s}'.format(time_zone_string))

self._preferred_time_zone = time_zone_string

def _ParsePerformanceOptions(self, options):
"""Parses the performance options.
Expand Down Expand Up @@ -334,29 +370,6 @@ def _ParseProcessingOptions(self, options):
dfvfs_definitions.PREFERRED_GPT_BACK_END = (
dfvfs_definitions.TYPE_INDICATOR_GPT)

def _ParseTimeZoneOption(self, options):
"""Parses the time zone options.
Args:
options (argparse.Namespace): command line arguments.
Raises:
BadConfigOption: if the options are invalid.
"""
time_zone_string = self.ParseStringOption(options, 'timezone')
if isinstance(time_zone_string, str):
if time_zone_string.lower() == 'list':
self.list_time_zones = True

elif time_zone_string:
try:
pytz.timezone(time_zone_string)
except pytz.UnknownTimeZoneError:
raise errors.BadConfigOption(
'Unknown time zone: {0:s}'.format(time_zone_string))

self._preferred_time_zone = time_zone_string

def _PreprocessSources(self, extraction_engine, session, storage_writer):
"""Preprocesses the sources.
Expand Down Expand Up @@ -424,6 +437,7 @@ def _ProcessSources(self, session, storage_writer):
configuration.parser_filter_expression.split(','))
session.parser_filter_expression = self._parser_filter_expression

self._SetExtractionPreferredLanguage(extraction_engine.knowledge_base)
self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

# TODO: set mount path in knowledge base with
Expand Down Expand Up @@ -496,6 +510,23 @@ def _ReadParserPresetsFromFile(self):
'Unable to read parser presets from file with error: {0!s}'.format(
exception))

def _SetExtractionPreferredLanguage(self, knowledge_base):
"""Sets the preferred language before extraction.
Args:
knowledge_base (KnowledgeBase): contains information from the source
data needed for parsing.
"""
# Note session.preferred_language will default to en-US but
# self._preferred_language is None when not set.
if self._preferred_language:
try:
knowledge_base.SetLanguage(self._preferred_language)
except ValueError:
logger.warning(
'Unsupported language: {0:s}, defaulting to {1:s}'.format(
self._preferred_language, knowledge_base.language))

def _SetExtractionPreferredTimeZone(self, knowledge_base):
"""Sets the preferred time zone before extraction.
Expand All @@ -514,6 +545,27 @@ def _SetExtractionPreferredTimeZone(self, knowledge_base):
'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
self._preferred_time_zone, knowledge_base._time_zone.zone))

def AddExtractionOptions(self, argument_group):
"""Adds the extraction options to the argument group.
Args:
argument_group (argparse._ArgumentGroup): argparse argument group.
"""
helpers_manager.ArgumentHelperManager.AddCommandLineArguments(
argument_group, names=['language'])

# Note defaults here are None so we can determine if an option was set.

# TODO: add preferred encoding

argument_group.add_argument(
'-z', '--zone', '--timezone', dest='timezone', action='store',
metavar='TIME_ZONE', type=str, default=None, help=(
'preferred time zone of extracted date and time values that are '
'stored without a time zone indicator. The time zone is determined '
'based on the source data where possible otherwise it will default '
'to UTC. Use "list" to see a list of available time zones.'))

def AddPerformanceOptions(self, argument_group):
"""Adds the performance options to the argument group.
Expand Down Expand Up @@ -549,22 +601,6 @@ def AddProcessingOptions(self, argument_group):
helpers_manager.ArgumentHelperManager.AddCommandLineArguments(
argument_group, names=argument_helper_names)

def AddTimeZoneOption(self, argument_group):
"""Adds the time zone option to the argument group.
Args:
argument_group (argparse._ArgumentGroup): argparse argument group.
"""
# Note the default here is None so we can determine if the time zone
# option was set.
argument_group.add_argument(
'-z', '--zone', '--timezone', dest='timezone', action='store',
metavar='TIME_ZONE', type=str, default=None, help=(
'preferred time zone of extracted date and time values that are '
'stored without a time zone indicator. The time zone is determined '
'based on the source data where possible otherwise it will default '
'to UTC. Use "list" to see a list of available time zones.'))

def ExtractEventsFromSources(self):
"""Processes the sources and extracts events.
Expand Down Expand Up @@ -601,6 +637,7 @@ def ExtractEventsFromSources(self):
debug_mode=self._debug_mode,
filter_file_path=self._filter_file,
preferred_encoding=self.preferred_encoding,
preferred_language=self._preferred_language,
preferred_time_zone=self._preferred_time_zone,
preferred_year=self._preferred_year,
text_prepend=self._text_prepend)
Expand Down Expand Up @@ -638,6 +675,16 @@ def ExtractEventsFromSources(self):

self._status_view.PrintExtractionSummary(processing_status)

def ListLanguageIdentifiers(self):
"""Lists the language identifiers."""
table_view = views.ViewsFactory.GetTableView(
self._views_format_type, column_names=['Identifier', 'Language'],
title='Language identifiers')
for language_id, value_list in sorted(
language_ids.LANGUAGE_IDENTIFIERS.items()):
table_view.AddRow([language_id, value_list[1]])
table_view.Write(self._output_writer)

def ListParsersAndPlugins(self):
"""Lists information about the available parsers and plugins."""
parsers_information = parsers_manager.ParsersManager.GetParsersInformation()
Expand Down
10 changes: 5 additions & 5 deletions plaso/cli/log2timeline_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def ParseArguments(self, arguments):
extraction_group, names=argument_helper_names)

self.AddStorageMediaImageOptions(extraction_group)
self.AddTimeZoneOption(extraction_group)
self.AddExtractionOptions(extraction_group)
self.AddVSSProcessingOptions(extraction_group)
self.AddCredentialOptions(extraction_group)

Expand Down Expand Up @@ -291,7 +291,7 @@ def ParseOptions(self, options):
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=argument_helper_names)

self._ParseTimeZoneOption(options)
self._ParseExtractionOptions(options)

self.list_hashers = self._hasher_names_string == 'list'
self.list_parsers_and_plugins = self._parser_filter_expression == 'list'
Expand All @@ -305,9 +305,9 @@ def ParseOptions(self, options):

self.dependencies_check = getattr(options, 'dependencies_check', True)

if (self.list_hashers or self.list_parsers_and_plugins or
self.list_profilers or self.list_time_zones or self.show_info or
self.show_troubleshooting):
if (self.list_hashers or self.list_language_identifiers or
self.list_parsers_and_plugins or self.list_profilers or
self.list_time_zones or self.show_info or self.show_troubleshooting):
return

self._ParseInformationalOptions(options)
Expand Down
11 changes: 11 additions & 0 deletions plaso/cli/psort_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from plaso.lib import loggers
from plaso.multi_process import output_engine as multi_output_engine
from plaso.storage import factory as storage_factory
from plaso.winnt import language_ids


class PsortTool(
Expand Down Expand Up @@ -237,6 +238,16 @@ def AddProcessingOptions(self, argument_group):
'15.0 minutes. If a worker process exceeds this timeout it is '
'killed by the main (foreman) process.'))

def ListLanguageIdentifiers(self):
"""Lists the language identifiers."""
table_view = views.ViewsFactory.GetTableView(
self._views_format_type, column_names=['Identifier', 'Language'],
title='Language identifiers')
for language_id, value_list in sorted(
language_ids.LANGUAGE_IDENTIFIERS.items()):
table_view.AddRow([language_id, value_list[1]])
table_view.Write(self._output_writer)

def ParseArguments(self, arguments):
"""Parses the command line arguments.
Expand Down
15 changes: 3 additions & 12 deletions plaso/cli/psteal_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ class PstealTool(
dependencies_check (bool): True if the availability and versions of
dependencies should be checked.
list_hashers (bool): True if the hashers should be listed.
list_language_identifiers (bool): True if information about the language
identifiers should be shown.
list_output_modules (bool): True if information about the output modules
should be shown.
list_parsers_and_plugins (bool): True if the parsers and plugins should
Expand Down Expand Up @@ -96,14 +94,12 @@ def __init__(self, input_reader=None, output_writer=None):
self._number_of_analysis_reports = 0
self._output_format = None
self._parsers_manager = parsers_manager.ParsersManager
self._preferred_language = 'en-US'
self._preferred_year = None
self._time_slice = None
self._use_time_slicer = False

self.dependencies_check = True
self.list_hashers = False
self.list_language_identifiers = False
self.list_output_modules = False
self.list_parsers_and_plugins = False

Expand Down Expand Up @@ -269,7 +265,7 @@ def ParseArguments(self, arguments):

self.AddStorageOptions(extraction_group)
self.AddStorageMediaImageOptions(extraction_group)
self.AddTimeZoneOption(extraction_group)
self.AddExtractionOptions(extraction_group)
self.AddVSSProcessingOptions(extraction_group)
self.AddCredentialOptions(extraction_group)

Expand All @@ -292,9 +288,6 @@ def ParseArguments(self, arguments):

output_group = argument_parser.add_argument_group('output arguments')

helpers_manager.ArgumentHelperManager.AddCommandLineArguments(
output_group, names=['language'])

self.AddOutputOptions(output_group)

output_format_group = argument_parser.add_argument_group(
Expand Down Expand Up @@ -360,15 +353,13 @@ def ParseOptions(self, options):
# and output_time_zone options.
self._ParseOutputOptions(options)

argument_helper_names = [
'artifact_definitions', 'hashers', 'language', 'parsers']
argument_helper_names = ['artifact_definitions', 'hashers', 'parsers']
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=argument_helper_names)

self._ParseTimeZoneOption(options)
self._ParseExtractionOptions(options)

self.list_hashers = self._hasher_names_string == 'list'
self.list_language_identifiers = self._preferred_language == 'list'
self.list_parsers_and_plugins = self._parser_filter_expression == 'list'

self.show_troubleshooting = getattr(options, 'show_troubleshooting', False)
Expand Down
11 changes: 0 additions & 11 deletions plaso/cli/tool_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from plaso.lib import errors
from plaso.output import manager as output_manager
from plaso.output import mediator as output_mediator
from plaso.winnt import language_ids


# TODO: pass argument_parser instead of argument_group and add groups
Expand Down Expand Up @@ -314,16 +313,6 @@ def AddOutputOptions(self, argument_group):
'this are: dynamic and l2t_csv. Use "list" to see a list of '
'available time zones.'))

def ListLanguageIdentifiers(self):
"""Lists the language identifiers."""
table_view = views.ViewsFactory.GetTableView(
self._views_format_type, column_names=['Identifier', 'Language'],
title='Language identifiers')
for language_id, value_list in sorted(
language_ids.LANGUAGE_IDENTIFIERS.items()):
table_view.AddRow([language_id, value_list[1]])
table_view.Write(self._output_writer)

def ListOutputModules(self):
"""Lists the output modules."""
table_view = views.ViewsFactory.GetTableView(
Expand Down

0 comments on commit c1998df

Please sign in to comment.