Skip to content

Commit

Permalink
Ensure byte-for-byte results when reading and then writing a file.
Browse files Browse the repository at this point in the history
When reading a DiffX file using the DOM and then writing it back out,
the contents of the file would change. Since DiffX files are intended to
be a mutable format, we don't want to make any unwanted changes.

There were a few problems leading to this:

1) When reading into a file into a `DiffX` object, default options would
   be preserved. This is really a problem for the main `encoding`
   option, which would default to UTF-8 if missing in the DiffX file
   content, and would then be written back out. Now, options are cleared
   out before they're set.

2) Similarly, if an explicit main `encoding` value wasn't provided, the
   default in `DiffXWriter` would be used. We now explicitly pull out
   the `encoding` option, defaulting to `None` if not found in the file,
   and pass it in to `DiffXWriter`.

2) `DiffXWriter` was hard-coding the version being set, and the DOM
   therefore wasn't setting a version when writing. Now it will accept
   a provided version. While not important right now, this will be
   important for compatibility with future spec versions. It's up to
   the caller to ensure the version being provided is valid.

Unit tests were added to ensure that we now get byte-for-byte results.

Testing Done:
Unit tests pass.

Reviewed at https://reviews.reviewboard.org/r/11763/
  • Loading branch information
chipx86 committed Aug 1, 2021
1 parent 6246b15 commit d88e782
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 14 deletions.
6 changes: 3 additions & 3 deletions python/pydiffx/dom/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from pydiffx.options import (DiffType,
LineEndings,
MetaFormat,
PreambleMimeType)
from pydiffx.writer import DiffXWriter
PreambleMimeType,
SpecVersion)


class OptionProperty(object):
Expand Down Expand Up @@ -189,7 +189,7 @@ class VersionOptionProperty(OptionProperty):

option_name = 'version'
data_type = six.text_type
choices = (DiffXWriter.VERSION,)
choices = SpecVersion.VALID_VALUES


class PreambleIndentOptionProperty(OptionProperty):
Expand Down
2 changes: 2 additions & 0 deletions python/pydiffx/dom/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def _read_main_section(self, diffx, section, section_info):
section_info (dict):
Information on the section from the streaming reader.
"""
diffx.options.clear()
diffx.options.update(section_info['options'])

def _read_change_section(self, diffx, section, section_info):
Expand Down Expand Up @@ -209,4 +210,5 @@ def _read_file_section(self, diffx, section, section_info):
def _set_content_options(self, section, options):
options.pop('length', None)

section.options.clear()
section.options.update(options)
9 changes: 7 additions & 2 deletions python/pydiffx/dom/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ def write_stream(self, diffx, stream):
the error message.
"""
main_options = diffx.options.copy()
main_options.pop('version', None)

writer = self.writer_cls(stream, **main_options)
version = main_options.pop('version', DiffXWriter.VERSION)
encoding = main_options.pop('encoding', None)

writer = self.writer_cls(stream,
version=version,
encoding=encoding,
**main_options)

for subsection in diffx:
self._write_section(subsection, writer)
Expand Down
16 changes: 16 additions & 0 deletions python/pydiffx/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class MetaFormat(object):
JSON,
}


class PreambleMimeType(object):
"""Mimetypes available for a preamble section.
Expand All @@ -72,3 +73,18 @@ class PreambleMimeType(object):
MARKDOWN,
PLAIN,
}


class SpecVersion(object):
"""Supported specification versions.
These may be used as the DiffX ``version`` option.
"""

#: The default version to write.
DEFAULT_VERSION = '1.0'

#: A set of values allowed for the version option.
VALID_VALUES = {
'1.0',
}
6 changes: 2 additions & 4 deletions python/pydiffx/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import six

from pydiffx.errors import DiffXParseError
from pydiffx.options import SpecVersion
from pydiffx.sections import (CONTENT_SECTIONS,
META_SECTIONS,
PREAMBLE_SECTIONS,
Expand Down Expand Up @@ -37,9 +38,6 @@ class DiffXReader(object):
during iteration.
"""

#: The list of DiffX specification versions supported.
SUPPORTED_VERSIONS = {'1.0'}

_HEADER_OPTION_KEY_RE = re.compile(br'[A-Za-z][A-Za-z0-9_-]*')
_HEADER_OPTION_VALUE_RE = re.compile(br'[A-Za-z0-9_.-]+')
_HEADER_RE = re.compile(
Expand Down Expand Up @@ -248,7 +246,7 @@ def iter_sections(self):
# Validate the DiffX version.
diffx_version = options.get('version')

if diffx_version not in self.SUPPORTED_VERSIONS:
if diffx_version not in SpecVersion.VALID_VALUES:
raise DiffXParseError(
'The DiffX version in this file (%s) is not '
'supported by this version of the diffx module'
Expand Down
151 changes: 151 additions & 0 deletions python/pydiffx/tests/test_dom_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,157 @@ def test_from_bytes_with_complex_diff(self):
'line_endings': LineEndings.DOS,
})

def test_from_bytes_with_no_diffx_encoding(self):
"""Testing DiffX.from_bytes with a diffx without a main encoding"""
diffx = DiffX.from_bytes(
b'#diffx: version=1.0\n'
b'#.change:\n'
b'#..file:\n'
b'#...meta: format=json, length=82\n'
b'{\n'
b' "path": {\n'
b' "new": "message2.py",\n'
b' "old": "message.py"\n'
b' }\n'
b'}\n'
b'#...diff: length=50\n'
b'--- file\n'
b'+++ file\n'
b'@@ -1 +1 @@\n'
b'-old line\n'
b'+new line\n'
)

self.assertEqual(diffx.options, {
'version': '1.0',
})
self.assertIsNone(diffx.preamble)
self.assertEqual(diffx.preamble_section.options, {})
self.assertEqual(diffx.meta, {})
self.assertEqual(diffx.meta_section.options, {
'format': MetaFormat.JSON,
})
self.assertEqual(len(diffx.changes), 1)

# Change #1
change = diffx.changes[0]
self.assertEqual(change.options, {})
self.assertIsNone(change.preamble)
self.assertEqual(change.preamble_section.options, {})
self.assertEqual(change.meta, {})
self.assertEqual(change.meta_section.options, {
'format': MetaFormat.JSON,
})
self.assertEqual(len(change.files), 1)

# Change #1, file #1
file = change.files[0]
self.assertEqual(file.meta, {
'path': {
'new': 'message2.py',
'old': 'message.py',
},
})
self.assertEqual(file.meta_section.options, {
'format': MetaFormat.JSON,
})
self.assertEqual(
file.diff,
b'--- file\n'
b'+++ file\n'
b'@@ -1 +1 @@\n'
b'-old line\n'
b'+new line\n')
self.assertEqual(file.diff_section.options, {})

def test_from_bytes_to_bytes_preserves_content(self):
"""Testing DiffX.from_bytes followed by to_bytes results in
byte-for-byte reproduction
"""
diff_content = (
b'#diffx: version=1.0\n'
b'#.preamble: encoding=ascii, indent=2, length=36,'
b' line_endings=dos, mimetype=text/plain\n'
b' This is the file-level preamble.\r\n'
b'#.meta: encoding=utf-32, format=json, length=96\n'
b'\xff\xfe\x00\x00{\x00\x00\x00\n\x00\x00\x00'
b' \x00\x00\x00 \x00\x00\x00 \x00\x00\x00 \x00\x00\x00"'
b'\x00\x00\x00k\x00\x00\x00e\x00\x00\x00y\x00\x00\x00"'
b'\x00\x00\x00:\x00\x00\x00 \x00\x00\x00"\x00\x00\x00v'
b'\x00\x00\x00a\x00\x00\x00l\x00\x00\x00u\x00\x00\x00e'
b'\x00\x00\x00"\x00\x00\x00\n\x00\x00\x00}\x00\x00\x00'
b'\n\x00\x00\x00'
b'#.change: encoding=utf-16\n'
b'#..preamble: indent=2, length=14, line_endings=unix, '
b'mimetype=text/markdown\n'
b' \xff\xfet\x00e\x00s\x00t\x00\n\x00'
b'#..meta: encoding=utf-8, format=json, length=244\n'
b'{\n'
b' "author": "Test User <test@example.com>",\n'
b' "committer": "Test User <test@example.com>",\n'
b' "committer date": "2021-06-02T13:12:06-07:00",\n'
b' "date": "2021-06-01T19:26:31-07:00",\n'
b' "id": "a25e7b28af5e3184946068f432122c68c1a30b23"\n'
b'}\n'
b'#..file:\n'
b'#...meta: encoding=latin1, format=json, length=166\n'
b'{\n'
b' "path": "file1",\n'
b' "revision": {\n'
b' "new": "eed8df7f1400a95cdf5a87ddb947e7d9c5a19cef",\n'
b' "old": "c8839177d1a5605aa60abe69db95c84183f0eebe"\n'
b' }\n'
b'}\n'
b'#...diff: length=60, line_endings=unix\n'
b'--- /file1\n'
b'+++ /file1\n'
b'@@ -498,7 +498,7 @@\n'
b' ... diff content\n'
b'#.change: encoding=utf-32\n'
b'#..preamble: encoding=utf-8, indent=4, length=56, '
b'line_endings=unix\n'
b' Summary of commit #2\n'
b' \n'
b' Here\'s a description.\n'
b'#..meta: encoding=utf-8, format=json, length=244\n'
b'{\n'
b' "author": "Test User <test@example.com>",\n'
b' "committer": "Test User <test@example.com>",\n'
b' "committer date": "2021-06-02T19:46:25-07:00",\n'
b' "date": "2021-06-01T19:46:22-07:00",\n'
b' "id": "91127b687f583184144161f432222748c1a30b23"\n'
b'}\n'
b'#..file:\n'
b'#...meta: encoding=utf-32, format=json, length=96\n'
b'\xff\xfe\x00\x00'
b'{\x00\x00\x00\n'
b'\x00\x00\x00 \x00\x00\x00 \x00\x00\x00 \x00\x00\x00'
b' \x00\x00\x00"\x00\x00\x00k\x00\x00\x00e\x00\x00\x00'
b'y\x00\x00\x00"\x00\x00\x00:\x00\x00\x00 \x00\x00\x00'
b'"\x00\x00\x00v\x00\x00\x00a\x00\x00\x00l\x00\x00\x00'
b'u\x00\x00\x00e\x00\x00\x00"\x00\x00\x00\n'
b'\x00\x00\x00}\x00\x00\x00\n\x00\x00\x00'
b'#...diff: encoding=utf-16, length=22, line_endings=unix\n'
b'\xff\xfe \x00.\x00.\x00.\x00 \x00d\x00i\x00f\x00f\x00\n\x00'
b'#..file:\n'
b'#...meta: encoding=utf-8, format=json, length=166\n'
b'{\n'
b' "path": "file3",\n'
b' "revision": {\n'
b' "new": "0d4a0fb8d62b762a26e13591d06d93d79d61102f",\n'
b' "old": "be089b7197974703c83682088a068bef3422c6c2"\n'
b' }\n'
b'}\n'
b'#...diff: length=86, line_endings=dos\n'
b'--- a/file3\r\n'
b'+++ b/file3\r\n'
b'@@ -258,7 +258,8 @@\r\n'
b' ... diff content for commit 2, file3\r\n'
)

diffx = DiffX.from_bytes(diff_content)
self.assertMultiLineBytesEqual(diffx.to_bytes(), diff_content)

def test_meta(self):
"""Testing DiffX.meta"""
self.run_content_test({'key': 'value'},
Expand Down
17 changes: 16 additions & 1 deletion python/pydiffx/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,16 @@ def test_with_multi_commit_diff(self):
b' ... diff content for commit 2, file3\n'
)

def test_with_unsupported_version(self):
"""Testing DiffXWriter with an unsupported version"""
message = (
'"123.456" is not a supported value for version. Expected '
'one of: 1.0'
)

with self.assertRaisesMessage(DiffXOptionValueChoiceError, message):
self._create_writer(version='123.456')

def test_with_content_crlf_and_no_line_endings(self):
"""Testing DiffXWriter with content containing CRLF newlines and no
line_endings= option
Expand Down Expand Up @@ -1001,7 +1011,12 @@ def _create_writer(self, **kwargs):
2. The writer.
"""
stream = io.BytesIO()
writer = DiffXWriter(stream, **kwargs)

try:
writer = DiffXWriter(stream, **kwargs)
except Exception:
stream.close()
raise

return stream, writer

Expand Down
20 changes: 16 additions & 4 deletions python/pydiffx/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
from pydiffx.options import (DiffType,
LineEndings,
MetaFormat,
PreambleMimeType)
PreambleMimeType,
SpecVersion)
from pydiffx.sections import Section, VALID_SECTION_STATES
from pydiffx.utils.text import (NEWLINE_FORMATS,
guess_line_endings,
Expand All @@ -36,7 +37,7 @@ class DiffXWriter(object):
"""

#: The supported version of the DiffX specification.
VERSION = '1.0'
VERSION = SpecVersion.DEFAULT_VERSION

#: Default indentation to apply to preamble sections.
DEFAULT_PREAMBLE_INDENT = 4
Expand All @@ -49,7 +50,7 @@ class DiffXWriter(object):
_LEVEL_CHANGE = 2
_LEVEL_FILE = 3

def __init__(self, fp, encoding=DEFAULT_ENCODING):
def __init__(self, fp, encoding=DEFAULT_ENCODING, version=VERSION):
"""Initialize the writer.
Args:
Expand All @@ -60,7 +61,18 @@ def __init__(self, fp, encoding=DEFAULT_ENCODING):
encoding (unicode, optional):
The default encoding for content in the file. This will
generally be left as the default of "utf-8".
version (unicode, optional):
The version of the DiffX file to write.
This must currently be ``1.0``.
"""
if version not in SpecVersion.VALID_VALUES:
raise DiffXOptionValueChoiceError(
option='version',
value=version,
choices=SpecVersion.VALID_VALUES)

self.fp = fp
self._stack = [{
'encoding': encoding,
Expand All @@ -70,7 +82,7 @@ def __init__(self, fp, encoding=DEFAULT_ENCODING):
self._new_container_section(section_name='diffx',
section_level=self._LEVEL_MAIN,
encoding=self._cur_encoding,
version=self.VERSION)
version=version)

@property
def _cur_section_level(self):
Expand Down

0 comments on commit d88e782

Please sign in to comment.