Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make fedmsg replays from datagrepper work in most cases #477

Merged
merged 2 commits into from Sep 6, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 1 addition & 7 deletions fedmsg/consumers/__init__.py
Expand Up @@ -183,16 +183,10 @@ def _backlog(self, data):

retrieved = 0
for message in self.get_datagrepper_results(then, now):

# Take the messages from datagrepper and remove any keys that were
# artificially added to the message. The presence of these would
# otherwise cause message crypto validation to fail.
for artificial_key in ('source_name', 'source_version'):
if artificial_key in message:
del message[artificial_key]

# Also, we expect the timestamp to be an 'int'
message['timestamp'] = int(message['timestamp'])
message = fedmsg.crypto.utils.fix_datagrepper_message(message)

if message['msg_id'] != last['msg_id']:
retrieved = retrieved + 1
Expand Down
46 changes: 46 additions & 0 deletions fedmsg/crypto/utils.py
Expand Up @@ -7,6 +7,52 @@
_log = logging.getLogger(__name__)


def fix_datagrepper_message(message):
"""
See if a message is (probably) a datagrepper message and attempt to mutate
it to pass signature validation.

Datagrepper adds the 'source_name' and 'source_version' keys. If messages happen
to use those keys, they will fail message validation. Additionally, a 'headers'
dictionary is present on all responses, regardless of whether it was in the
original message or not. This is deleted if it's null, which won't be correct in
all cases. Finally, datagrepper turns the 'timestamp' field into a float, but it
might have been an integer when the message was signed.

A copy of the dictionary is made and returned if altering the message is necessary.

I'm so sorry.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol. This does all sound pretty crazy!


Args:
message (dict): A message to clean up.

Returns:
dict: A copy of the provided message, with the datagrepper-related keys removed
if they were present.
"""
if not ('source_name' in message and 'source_version' in message):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this ought to be an or instead of and?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

datanommer always adds both, according to the code. My fear was that if a message uses either one of the keys (they're both pretty generic), we'd mess with it. We'll still do the wrong thing if they use both, but this reduces the likelihood.

It's not a great solution. It's not even a good solution, but I don't know what else to do.

return message

# Don't mutate the original message
message = message.copy()

del message['source_name']
del message['source_version']
# datanommer adds the headers field to the message in all cases.
# This is a huge problem because if the signature was generated with a 'headers'
# key set and we delete it here, messages will fail validation, but if we don't
# messages will fail validation if they didn't have a 'headers' key set.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here's another crazy idea: if headers is null and validation fals, should we try adding a null headers back and see if that version passes validation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, and I considered that. The reason I opted not to is because this is already way more complicated than it should be and even if we did that we'd still be wrong plenty of times. What happens when someone sets that to null rather than an empty object? What happens when messages get signed with floating point timestamps rather than ints? datanommer truncates those. What happens when datanommer adds a new field and forgets to adjust this?

I'd rather have datanommer make strong promises about not altering messages.

#
# There's no way to know whether or not the headers field was part of the signed
# message or not. Generally, the problem is datanommer is mutating messages.
if 'headers' in message and not message['headers']:
del message['headers']
if 'timestamp' in message:
message['timestamp'] = int(message['timestamp'])

return message


def validate_policy(topic, signer, routing_policy, nitpicky=False):
"""
Checks that the sender is allowed to emit messages for the given topic.
Expand Down
34 changes: 34 additions & 0 deletions fedmsg/tests/consumers/test_consumers.py
Expand Up @@ -20,6 +20,7 @@
# Authors: Jeremy Cline <jcline@redhat.com>
"""Tests for the :mod:`fedmsg.consumers` module."""

import json
import os
import unittest

Expand All @@ -31,12 +32,45 @@
from fedmsg.tests.crypto.test_x509 import SSLDIR


FIXTURES_DIR = os.path.abspath(
os.path.join(os.path.dirname(__file__), '../fixtures/'))


class DummyConsumer(FedmsgConsumer):
"""Set attributes necessary to instantiate a consumer."""
config_key = 'dummy'
validate_signatures = True


class FedmsgConsumerReplayTests(unittest.TestCase):
"""Tests for the replay functionality of fedmsg consumers method."""

def setUp(self):
self.config = {
'dummy': True,
'ssldir': SSLDIR,
'ca_cert_cache': os.path.join(SSLDIR, 'fedora_ca.crt'),
'ca_cert_cache_expiry': 1497618475, # Stop fedmsg overwriting my CA, See Issue 420
'crypto_validate_backends': ['x509'],
}
self.hub = mock.Mock(config=self.config)
self.consumer = DummyConsumer(self.hub)

def test_backlog_message_validation(self):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be good to add a docblock here.

with open(os.path.join(FIXTURES_DIR, 'sample_datanommer_response.json')) as fd:
replay_messages = json.load(fd)
self.consumer.get_datagrepper_results = mock.Mock(
return_value=replay_messages['raw_messages'])
last_message = json.dumps({'message': {'body': {'msg_id': 'myid', 'timestamp': 0}}})

# This places all the messages from a call to "get_datagrepper_results" in the
# "incoming" queue.Queue
self.consumer._backlog(last_message)

while not self.consumer.incoming.empty():
self.consumer.validate(self.consumer.incoming.get())


class FedmsgConsumerValidateTests(unittest.TestCase):
"""Tests for the :meth:`FedmsgConsumer.validate` method."""

Expand Down
68 changes: 68 additions & 0 deletions fedmsg/tests/crypto/test_utils.py
Expand Up @@ -7,6 +7,74 @@
from fedmsg.crypto import utils


class FixDatanommerMessageTests(unittest.TestCase):
"""Tests for the :func:`fedmsg.crypto.utils.fix_datagrepper_message` function."""

def test_no_source_keys(self):
"""Assert messages with neither "source_name" or "source_version" are untouched."""
original_message = {'my': 'message'}

self.assertTrue(original_message is utils.fix_datagrepper_message(original_message))

def test_no_source_name(self):
"""Assert messages missing the "source_version" key are untouched."""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this comment was meant to say "source_name" instead of "source_version"?

original_message = {'source_version': '0.1.0'}

self.assertTrue(original_message is utils.fix_datagrepper_message(original_message))

def test_no_source_version(self):
"""Assert messages missing the "source_name" key are untouched."""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar here.

original_message = {'source_name': 'datanommer'}

self.assertTrue(original_message is utils.fix_datagrepper_message(original_message))

def test_no_timestamp(self):
"""Assert messages missing the "source_name" key are untouched."""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment is copypasta'd.

original_message = {
'source_name': 'datanommer',
'source_version': '1',
}

self.assertEqual({}, utils.fix_datagrepper_message(original_message))

def test_float_timestamp(self):
"""Assert the "timestamp" key is converted to an int from a float."""
original_message = {
'source_name': 'datanommer',
'source_version': '1',
'timestamp': 1.0,
}

self.assertEqual({'timestamp': 1}, utils.fix_datagrepper_message(original_message))

def test_empty_headers(self):
"""Assert the "headers" key is removed if it is empty."""
original_message = {
'source_name': 'datanommer',
'source_version': '1',
'headers': {},
}

self.assertEqual({}, utils.fix_datagrepper_message(original_message))

def test_headers(self):
"""Assert the "headers" key is untouched if it has a value."""
original_message = {
'source_name': 'datanommer',
'source_version': '1',
'headers': {'k': 'v'},
}

self.assertEqual({'headers': {'k': 'v'}}, utils.fix_datagrepper_message(original_message))

def test_message_copied(self):
"""Assert messages are copied if they are altered."""
original_message = {'source_name': 'datanommer', 'source_version': '1'}

self.assertEqual({}, utils.fix_datagrepper_message(original_message))
self.assertEqual({'source_name': 'datanommer', 'source_version': '1'}, original_message)


class ValidatePolicyTests(unittest.TestCase):
"""Tests for :func:`utils.validate_policy`."""

Expand Down