Skip to content
This repository has been archived by the owner on Jan 8, 2024. It is now read-only.

Commit

Permalink
Fix handling of module doc comments in main source file (#67)
Browse files Browse the repository at this point in the history
* Add testcases for module doc comments in main source file.

* Fix handling of module doc comments in main source file

See issue #57

* Fix indentation of regular expression

* Cleanup test name.

See #20
  • Loading branch information
Douglas Greiman committed Jan 29, 2018
1 parent 741dbb0 commit 0174d9e
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 38 deletions.
47 changes: 33 additions & 14 deletions compiler/python_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,27 @@
# End boilerplate
"""

# Boilerplate must be after the last __future__ import. See
# https://docs.python.org/2/reference/simple_stmts.html#future
_boilerplate_insertion_regex = re.compile('''(?sx)
(?P<before>
(
(
([#][^\\r\\n]*) | # comment
(\\s*) | # whitespace
(from\\s+__future__\\s+import\\s+[^\\r\\n]+) | # future import
('[^'].*?') | # module doc comment form 1
("[^"].*?") | # module doc comment form 2
(\'\'\'.*?(\'\'\')) | # module doc comment form 3
(""".*?""") # module doc comment form 4
)
[\\r\\n]+ # end of line(s) for Mac, Unix and/or Windows
)*
)
# Boilerplate is inserted here
(?P<after>.*)
''')

# Fully qualified names of subpar packages
_subpar_package = 'subpar'
_compiler_package = _subpar_package + '.compiler'
Expand Down Expand Up @@ -159,25 +180,23 @@ def generate_main(self, main_filename, boilerplate_contents):
# Read main source file, in unknown encoding. We use latin-1
# here, but any single-byte encoding that doesn't raise errors
# would work.
output_lines = []
with io.open(main_filename, 'rt', encoding='latin-1') as main_file:
output_lines = list(main_file)
original_content = main_file.read()

# Find a good place to insert the boilerplate, which is the
# first line that is not a comment, blank line, or future
# import.
skip_regex = re.compile(
'''(#.*)|(\\s+)|(from\\s+__future__\\s+import)''')
idx = 0
while idx < len(output_lines):
if not skip_regex.match(output_lines[idx]):
break
idx += 1
# first line that is not a comment, blank line, doc comment,
# or future import.
match = re.match(_boilerplate_insertion_regex, original_content)
assert match, original_content
assert (len(match.group('before')) + len(match.group('after'))) == \
len(original_content), (match, original_content)
new_content = (match.group('before') +
boilerplate_contents +
match.group('after'))

# Insert boilerplate (might be beginning, middle or end)
output_lines[idx:idx] = [boilerplate_contents]
contents = ''.join(output_lines).encode('latin-1')
return stored_resource.StoredContent('__main__.py', contents)
encoded_content = new_content.encode('latin-1')
return stored_resource.StoredContent('__main__.py', encoded_content)

def scan_manifest(self, manifest):
"""Return a dict of StoredResources based on an input manifest.
Expand Down
9 changes: 9 additions & 0 deletions compiler/python_archive_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ def test_generate_main(self):
# Future import
(b'from __future__ import print_function\n',
b'from __future__ import print_function\nBOILERPLATE\n'),
# Module docstrings
(b"'Single-quote Module docstring'\n",
b"'Single-quote Module docstring'\nBOILERPLATE\n"),
(b'"Double-quote Module docstring"\n',
b'"Double-quote Module docstring"\nBOILERPLATE\n'),
(b"'''Triple-single-quote module \"'\n\n docstring'''\n",
b"'''Triple-single-quote module \"'\n\n docstring'''\nBOILERPLATE\n"),
(b'"""Triple-double-quote module "\'\n\n docstring"""\n',
b'"""Triple-double-quote module "\'\n\n docstring"""\nBOILERPLATE\n'),
]
for main_content, expected in cases:
with test_utils.temp_file(main_content) as main_file:
Expand Down
8 changes: 4 additions & 4 deletions tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ par_binary(
)

par_binary(
name = "package_g/g",
srcs = ["package_g/g.py"],
main = "package_g/g.py",
name = "package_boilerplate/main",
srcs = ["package_boilerplate/main.py"],
main = "package_boilerplate/main.py",
srcs_version = "PY2AND3",
)

Expand Down Expand Up @@ -179,7 +179,7 @@ par_binary(
"tests/package_import_roots/import_roots",
),
("indirect_dependency", "//tests:package_c/c", "tests/package_c/c"),
("main_boilerplate", "//tests:package_g/g", "tests/package_g/g"),
("main_boilerplate", "//tests:package_boilerplate/main", "tests/package_boilerplate/main"),
("pkg_resources", "//tests:package_pkg_resources/main", "tests/package_pkg_resources/main"),
("shadow", "//tests:package_shadow/main", "tests/package_shadow/main"),
("version", "//tests:package_f/f", "tests/package_f/f"),
Expand Down
10 changes: 6 additions & 4 deletions tests/package_g/g.py → tests/package_boilerplate/main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#!/usr/bin/env python3
# -*- coding: latin-1

# Test __future__ imports
from __future__ import print_function

"""Integration test program G for Subpar.
# Test module docstring before boilerplate insertion
"""Integration test program for Subpar.
Test bootstrap interaction with __future__ imports and source file encodings.
"""

# Test __future__ imports
from __future__ import print_function


# Test the source file encoding specification above. See PEP 263 for
# details. In the line below, this source file contains a byte
# sequence that is valid latin-1 but not valid utf-8. Specifically,
Expand Down
8 changes: 8 additions & 0 deletions tests/package_boilerplate/main_PY2_filelist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
__main__.py
subpar/__init__.py
subpar/runtime/__init__.py
subpar/runtime/support.py
subpar/tests/__init__.py
subpar/tests/package_boilerplate/__init__.py
subpar/tests/package_boilerplate/main
subpar/tests/package_boilerplate/main.py
8 changes: 8 additions & 0 deletions tests/package_boilerplate/main_PY3_filelist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
__main__.py
subpar/__init__.py
subpar/runtime/__init__.py
subpar/runtime/support.py
subpar/tests/__init__.py
subpar/tests/package_boilerplate/__init__.py
subpar/tests/package_boilerplate/main
subpar/tests/package_boilerplate/main.py
8 changes: 0 additions & 8 deletions tests/package_g/g_PY2_filelist.txt

This file was deleted.

8 changes: 0 additions & 8 deletions tests/package_g/g_PY3_filelist.txt

This file was deleted.

0 comments on commit 0174d9e

Please sign in to comment.