Skip to content

Commit

Permalink
Lots of changes:
Browse files Browse the repository at this point in the history
- Refactored the parser api.
- Split and refactored the utils module.
- Introduced an api to store blobs in the relational db schema.
- Fininshed the client side artifact collector flow.
- Bug fixes for the client when non http proxies are configured.
- More work on Python3 compatibility.
- Introducing an API to decode files while downloading from the UI.
  • Loading branch information
grrrrrrrrr committed Oct 9, 2018
1 parent 27ba38d commit 07155aa
Show file tree
Hide file tree
Showing 228 changed files with 4,957 additions and 3,074 deletions.
27 changes: 11 additions & 16 deletions docker/Dockerfile.build_centos_i686
Expand Up @@ -2,23 +2,18 @@ FROM centos_i686:7_base

LABEL maintainer="grr-dev@googlegroups.com"

RUN linux32 yum update
# Install Python from source.
RUN cd /tmp && curl -O -L http://python.org/ftp/python/2.7.14/Python-2.7.14.tar.xz && \
tar xf Python-2.7.14.tar.xz && \
cd Python-2.7.14 && \
linux32 ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared LDFLAGS="-Wl,-rpath /usr/local/lib" && \
linux32 make && \
linux32 make install

RUN linux32 yum install -y \
python-devel \
java-1.8.0-openjdk \
libffi-devel \
openssl-devel \
git \
gcc \
gcc-c++ \
redhat-rpm-config \
rpm-build \
rpm-sign

# Install pip
RUN linux32 curl https://bootstrap.pypa.io/get-pip.py | python
# Install pip.
RUN linux32 curl https://bootstrap.pypa.io/get-pip.py | /usr/local/bin/python

# Install virtualenv.
RUN linux32 pip install --upgrade pip virtualenv

CMD ["/bin/bash"]
CMD ["/bin/bash"]
140 changes: 60 additions & 80 deletions grr/client/grr_response_client/client_actions/artifact_collector.py
Expand Up @@ -13,7 +13,7 @@
from grr_response_client.client_actions import operating_system
from grr_response_client.client_actions import standard
from grr_response_core.lib import artifact_utils
from grr_response_core.lib import parser as parser_lib
from grr_response_core.lib import parsers
from grr_response_core.lib import rdfvalue
from grr_response_core.lib import utils
# The client artifact collector parses the responses on the client. So the
Expand All @@ -29,13 +29,7 @@
from grr_response_core.lib.rdfvalues import file_finder as rdf_file_finder
from grr_response_core.lib.rdfvalues import paths as rdf_paths
from grr_response_core.lib.rdfvalues import protodict as rdf_protodict


def _NotImplemented(args):
# TODO(user): Not implemented yet. This method can be deleted once the
# missing source types are supported.
del args # Unused
raise NotImplementedError()
from grr_response_core.lib.util import precondition


class ArtifactCollector(actions.ActionPlugin):
Expand Down Expand Up @@ -68,11 +62,13 @@ def _CollectArtifact(self, artifact, apply_parsers):
"""Returns an `CollectedArtifact` rdf object for the requested artifact."""
artifact_result = rdf_artifacts.CollectedArtifact(name=artifact.name)

parsers = []
if apply_parsers:
parsers = parser_lib.Parser.GetClassesByArtifact(artifact.name)
parser_factory = parsers.ArtifactParserFactory(unicode(artifact.name))
else:
parser_factory = None

for source_result_list in self._ProcessSources(artifact.sources, parsers):
for source_result_list in self._ProcessSources(artifact.sources,
parser_factory):
for response in source_result_list:
action_result = rdf_artifacts.ClientActionResult()
action_result.type = response.__class__.__name__
Expand Down Expand Up @@ -111,52 +107,45 @@ def SetKnowledgeBaseValue(self, attribute, value):
if value:
self.knowledge_base.Set(attribute, value)

def _ProcessSources(self, sources, parsers):
def _ProcessSources(self, sources, parser_factory):
"""Iterates through sources yielding action responses."""
for source in sources:
for action, request in self._ParseSourceType(source):
yield self._RunClientAction(action, request, parsers, source.path_type)
yield self._RunClientAction(action, request, parser_factory,
source.path_type)

def _RunClientAction(self, action, request, parsers, path_type):
def _RunClientAction(self, action, request, parser_factory, path_type):
"""Runs the client action with the request and parses the result."""

responses = list(action(request))

if not parsers:
if parser_factory is None:
return responses

# filter parsers by process_together setting
multi_parsers = []
single_parsers = []
for parser in parsers:
# TODO(hanuszczak): This is absolutely disgusting and should be refactored
# with some kind of factory, that produces different parser instances
# depending whether we are on the server or on the client. Doing so would
# probably solve issues with this very artificial metclass registry that
# we have here.
if issubclass(parser, parser_lib.FileParser):
parser_obj = parser(vfs.VFSOpen)
else:
parser_obj = parser()

if parser_obj.process_together:
multi_parsers.append(parser_obj)
else:
single_parsers.append(parser_obj)

# parse the responses
parsed_responses = []

for response in responses:
for parser in single_parsers:
utils.AssertType(parser, parser_lib.SingleResponseParser)
for parser in parser_factory.SingleResponseParsers():
parsed_responses.extend(
parser.ParseResponse(self.knowledge_base, response, path_type))

for parser in multi_parsers:
for res in ParseMultipleResponses(parser, responses, self.knowledge_base,
path_type):
parsed_responses.append(res)
for parser in parser_factory.SingleFileParsers():
precondition.AssertType(response, rdf_client_fs.StatEntry)
pathspec = response.pathspec
with vfs.VFSOpen(pathspec) as filedesc:
parsed_responses.extend(
parser.ParseFile(self.knowledge_base, pathspec, filedesc))

for parser in parser_factory.MultiResponseParsers():
parsed_responses.extend(
parser.ParseResponses(self.knowledge_base, responses))

for parser in parser_factory.MultiFileParsers():
precondition.AssertIterableType(responses, rdf_client_fs.StatEntry)
pathspecs = [response.pathspec for response in responses]
with vfs.VFSMultiOpen(pathspecs) as filedescs:
parsed_responses.extend(
parser.ParseFiles(self.knowledge_base, pathspecs, filedescs))

return parsed_responses

Expand All @@ -168,7 +157,7 @@ def _ParseSourceType(self, source):
type_name.DIRECTORY: self._ProcessFileSource,
type_name.FILE: self._ProcessFileSource,
type_name.GREP: self._ProcessGrepSource,
type_name.REGISTRY_KEY: _NotImplemented,
type_name.REGISTRY_KEY: self._ProcessRegistryKeySource,
type_name.REGISTRY_VALUE: self._ProcessRegistryValueSource,
type_name.WMI: self._ProcessWmiSource,
type_name.ARTIFACT_FILES: self._ProcessArtifactFilesSource,
Expand All @@ -184,6 +173,34 @@ def _ParseSourceType(self, source):
for res in source_type_action(source):
yield res

def _ProcessRegistryKeySource(self, source):
"""Glob for paths in the registry."""
keys = source.base_source.attributes.get("keys", [])
if not keys:
return

interpolated_paths = artifact_utils.InterpolateListKbAttributes(
input_list=keys,
knowledge_base=self.knowledge_base,
ignore_errors=self.ignore_interpolation_errors)

glob_expressions = map(rdf_paths.GlobExpression, interpolated_paths)

patterns = []
for pattern in glob_expressions:
patterns.extend(pattern.Interpolate(knowledge_base=self.knowledge_base))
patterns.sort(key=len, reverse=True)

file_finder_action = rdf_file_finder.FileFinderAction.Stat()
request = rdf_file_finder.FileFinderArgs(
paths=patterns,
action=file_finder_action,
follow_links=True,
pathtype=rdf_paths.PathSpec.PathType.REGISTRY)
action = file_finder.RegistryKeyFromClient

yield action, request

def _ProcessGrepSource(self, source):
"""Find files fulfilling regex conditions."""
attributes = source.base_source.attributes
Expand Down Expand Up @@ -220,7 +237,7 @@ def _ProcessArtifactFilesSource(self, source):
pathspec_attribute = source.base_source.attributes.get("pathspec_attribute")

for source_result_list in self._ProcessSources(
source.artifact_sources, parsers=[]):
source.artifact_sources, parser_factory=None):
for response in source_result_list:
path = _ExtractPath(response, pathspec_attribute)
if path is not None:
Expand Down Expand Up @@ -344,43 +361,6 @@ def _ProcessRegistryValueSource(self, source):
yield action, request


# TODO(hanuszczak): Apply the same treatment as for single response parsing.
def ParseMultipleResponses(parser_obj, responses, knowledge_base, path_type):
"""Call the parser for the responses and yield rdf values.
Args:
parser_obj: An instance of the parser.
responses: A list of rdf value responses from a client action.
knowledge_base: containing information about the client.
path_type: Specifying whether OS or TSK paths are used.
Returns:
An iterable of rdf value responses.
Raises:
ValueError: If the requested parser is not supported.
"""
parse_multiple = parser_obj.ParseMultiple

if isinstance(parser_obj, parser_lib.FileParser):
file_objects = []
stats = []
for res in responses:
try:
file_objects.append(vfs.VFSOpen(res.pathspec))
stats.append(rdf_client_fs.StatEntry(pathspec=res.pathspec))
except IOError:
continue
result_iterator = parse_multiple(stats, file_objects, knowledge_base)
elif isinstance(parser_obj,
(parser_lib.RegistryParser, parser_lib.RegistryValueParser)):
result_iterator = parse_multiple(responses, knowledge_base)
elif isinstance(parser_obj, parser_lib.ArtifactFilesParser):
result_iterator = parse_multiple(responses, knowledge_base, path_type)
else:
raise ValueError("Unsupported parser: %s" % parser_obj)
return result_iterator


def _ExtractPath(response, pathspec_attribute=None):
"""Returns the path from a client action response as a string.
Expand Down
Expand Up @@ -13,8 +13,10 @@

from grr_response_client.client_actions import artifact_collector
from grr_response_core import config
from grr_response_core.lib import factory
from grr_response_core.lib import flags
from grr_response_core.lib import parser
from grr_response_core.lib import parsers
from grr_response_core.lib import utils
from grr_response_core.lib.rdfvalues import anomaly as rdf_anomaly
from grr_response_core.lib.rdfvalues import artifacts as rdf_artifact
Expand Down Expand Up @@ -207,6 +209,44 @@ def testRegistryValueArtifact(self):
urn = file_stat.pathspec.AFF4Path(self.SetupClient(0))
self.assertTrue(str(urn).endswith("BootExecute"))

def testRegistryKeyArtifact(self):
"""Test the basic Registry Key collection."""

source = rdf_artifact.ArtifactSource(
type=self.source_type.REGISTRY_KEY,
attributes={
"keys": [
r"HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet"
r"\Control\Session Manager\*"
],
})
request = GetRequest(source, "TestRegistryKey")

with vfs_test_lib.VFSOverrider(rdf_paths.PathSpec.PathType.REGISTRY,
vfs_test_lib.FakeRegistryVFSHandler):
with vfs_test_lib.VFSOverrider(rdf_paths.PathSpec.PathType.OS,
vfs_test_lib.FakeFullVFSHandler):
collected_artifact = self.RunArtifactCollector(request)
self.assertEqual(len(collected_artifact.action_results), 1)
file_stat = collected_artifact.action_results[0].value
self.assertTrue(isinstance(file_stat, rdf_client_fs.StatEntry))

def testRegistryNoKeysArtifact(self):
"""Test the basic Registry Key collection."""

source = rdf_artifact.ArtifactSource(
type=self.source_type.REGISTRY_KEY, attributes={
"keys": [],
})
request = GetRequest(source, "TestRegistryKey")

with vfs_test_lib.VFSOverrider(rdf_paths.PathSpec.PathType.REGISTRY,
vfs_test_lib.FakeRegistryVFSHandler):
with vfs_test_lib.VFSOverrider(rdf_paths.PathSpec.PathType.OS,
vfs_test_lib.FakeFullVFSHandler):
collected_artifact = self.RunArtifactCollector(request)
self.assertEqual(len(collected_artifact.action_results), 0)

def testDirectoryArtifact(self):
"""Test the source type `DIRECTORY`."""

Expand Down Expand Up @@ -520,11 +560,10 @@ def Parse(self, stat, file_obj, knowledge_base):
yield rdf_protodict.AttributedDict(**cfg)


class FakeFileParserProcessTogether(parser.FileParser):
class FakeFileMultiParser(parser.FileMultiParser):

output_types = ["AttributedDict"]
supported_artifacts = ["FakeFileArtifact2"]
process_together = True

def ParseMultiple(self, stats, file_objects, knowledge_base):

Expand All @@ -545,8 +584,12 @@ def ParseMultiple(self, stats, file_objects, knowledge_base):

class ParseResponsesTest(client_test_lib.EmptyActionTest):

@mock.patch.object(parsers, "SINGLE_RESPONSE_PARSER_FACTORY",
factory.Factory(parser.SingleResponseParser))
def testCmdArtifactAction(self):
"""Test the actual client action with parsers."""
parsers.SINGLE_RESPONSE_PARSER_FACTORY.Register("Cmd", TestEchoCmdParser)

client_test_lib.Command("/bin/echo", args=["1"])

source = rdf_artifact.ArtifactSource(
Expand All @@ -570,8 +613,11 @@ def testCmdArtifactAction(self):
self.assertIsInstance(res, rdf_client.SoftwarePackage)
self.assertEqual(res.description, "1\n")

@mock.patch.object(parsers, "SINGLE_FILE_PARSER_FACTORY",
factory.Factory(parser.SingleFileParser))
def testFakeFileArtifactAction(self):
"""Test collecting a file artifact and parsing the response."""
parsers.SINGLE_FILE_PARSER_FACTORY.Register("Fake", FakeFileParser)

file_path = os.path.join(self.base_path, "numbers.txt")
source = rdf_artifact.ArtifactSource(
Expand All @@ -593,8 +639,11 @@ def testFakeFileArtifactAction(self):
self.assertEqual(len(res.users), 1000)
self.assertEqual(res.filename, file_path)

@mock.patch.object(parsers, "MULTI_FILE_PARSER_FACTORY",
factory.Factory(parser.MultiFileParser))
def testFakeFileArtifactActionProcessTogether(self):
"""Test collecting a file artifact and parsing the responses together."""
parsers.MULTI_FILE_PARSER_FACTORY.Register("Fake", FakeFileMultiParser)

file_path = os.path.join(self.base_path, "numbers.txt")
source = rdf_artifact.ArtifactSource(
Expand Down

0 comments on commit 07155aa

Please sign in to comment.