diff --git a/compiler/python_archive.py b/compiler/python_archive.py index 3dfdba0..822c9de 100755 --- a/compiler/python_archive.py +++ b/compiler/python_archive.py @@ -50,6 +50,27 @@ # End boilerplate """ +# Boilerplate must be after the last __future__ import. See +# https://docs.python.org/2/reference/simple_stmts.html#future +_boilerplate_insertion_regex = re.compile('''(?sx) +(?P + ( + ( + ([#][^\\r\\n]*) | # comment + (\\s*) | # whitespace + (from\\s+__future__\\s+import\\s+[^\\r\\n]+) | # future import + ('[^'].*?') | # module doc comment form 1 + ("[^"].*?") | # module doc comment form 2 + (\'\'\'.*?(\'\'\')) | # module doc comment form 3 + (""".*?""") # module doc comment form 4 + ) + [\\r\\n]+ # end of line(s) for Mac, Unix and/or Windows + )* +) +# Boilerplate is inserted here +(?P.*) +''') + # Fully qualified names of subpar packages _subpar_package = 'subpar' _compiler_package = _subpar_package + '.compiler' @@ -159,25 +180,23 @@ def generate_main(self, main_filename, boilerplate_contents): # Read main source file, in unknown encoding. We use latin-1 # here, but any single-byte encoding that doesn't raise errors # would work. - output_lines = [] with io.open(main_filename, 'rt', encoding='latin-1') as main_file: - output_lines = list(main_file) + original_content = main_file.read() # Find a good place to insert the boilerplate, which is the - # first line that is not a comment, blank line, or future - # import. - skip_regex = re.compile( - '''(#.*)|(\\s+)|(from\\s+__future__\\s+import)''') - idx = 0 - while idx < len(output_lines): - if not skip_regex.match(output_lines[idx]): - break - idx += 1 + # first line that is not a comment, blank line, doc comment, + # or future import. + match = re.match(_boilerplate_insertion_regex, original_content) + assert match, original_content + assert (len(match.group('before')) + len(match.group('after'))) == \ + len(original_content), (match, original_content) + new_content = (match.group('before') + + boilerplate_contents + + match.group('after')) # Insert boilerplate (might be beginning, middle or end) - output_lines[idx:idx] = [boilerplate_contents] - contents = ''.join(output_lines).encode('latin-1') - return stored_resource.StoredContent('__main__.py', contents) + encoded_content = new_content.encode('latin-1') + return stored_resource.StoredContent('__main__.py', encoded_content) def scan_manifest(self, manifest): """Return a dict of StoredResources based on an input manifest. diff --git a/compiler/python_archive_test.py b/compiler/python_archive_test.py index 17d5049..e11c941 100644 --- a/compiler/python_archive_test.py +++ b/compiler/python_archive_test.py @@ -140,6 +140,15 @@ def test_generate_main(self): # Future import (b'from __future__ import print_function\n', b'from __future__ import print_function\nBOILERPLATE\n'), + # Module docstrings + (b"'Single-quote Module docstring'\n", + b"'Single-quote Module docstring'\nBOILERPLATE\n"), + (b'"Double-quote Module docstring"\n', + b'"Double-quote Module docstring"\nBOILERPLATE\n'), + (b"'''Triple-single-quote module \"'\n\n docstring'''\n", + b"'''Triple-single-quote module \"'\n\n docstring'''\nBOILERPLATE\n"), + (b'"""Triple-double-quote module "\'\n\n docstring"""\n', + b'"""Triple-double-quote module "\'\n\n docstring"""\nBOILERPLATE\n'), ] for main_content, expected in cases: with test_utils.temp_file(main_content) as main_file: diff --git a/tests/BUILD b/tests/BUILD index a87f306..0e72f9d 100644 --- a/tests/BUILD +++ b/tests/BUILD @@ -76,9 +76,9 @@ par_binary( ) par_binary( - name = "package_g/g", - srcs = ["package_g/g.py"], - main = "package_g/g.py", + name = "package_boilerplate/main", + srcs = ["package_boilerplate/main.py"], + main = "package_boilerplate/main.py", srcs_version = "PY2AND3", ) @@ -179,7 +179,7 @@ par_binary( "tests/package_import_roots/import_roots", ), ("indirect_dependency", "//tests:package_c/c", "tests/package_c/c"), - ("main_boilerplate", "//tests:package_g/g", "tests/package_g/g"), + ("main_boilerplate", "//tests:package_boilerplate/main", "tests/package_boilerplate/main"), ("pkg_resources", "//tests:package_pkg_resources/main", "tests/package_pkg_resources/main"), ("shadow", "//tests:package_shadow/main", "tests/package_shadow/main"), ("version", "//tests:package_f/f", "tests/package_f/f"), diff --git a/tests/package_g/g.py b/tests/package_boilerplate/main.py similarity index 87% rename from tests/package_g/g.py rename to tests/package_boilerplate/main.py index 2006319..03ef2d6 100755 --- a/tests/package_g/g.py +++ b/tests/package_boilerplate/main.py @@ -1,14 +1,16 @@ #!/usr/bin/env python3 # -*- coding: latin-1 -# Test __future__ imports -from __future__ import print_function - -"""Integration test program G for Subpar. +# Test module docstring before boilerplate insertion +"""Integration test program for Subpar. Test bootstrap interaction with __future__ imports and source file encodings. """ +# Test __future__ imports +from __future__ import print_function + + # Test the source file encoding specification above. See PEP 263 for # details. In the line below, this source file contains a byte # sequence that is valid latin-1 but not valid utf-8. Specifically, diff --git a/tests/package_boilerplate/main_PY2_filelist.txt b/tests/package_boilerplate/main_PY2_filelist.txt new file mode 100644 index 0000000..0b93076 --- /dev/null +++ b/tests/package_boilerplate/main_PY2_filelist.txt @@ -0,0 +1,8 @@ +__main__.py +subpar/__init__.py +subpar/runtime/__init__.py +subpar/runtime/support.py +subpar/tests/__init__.py +subpar/tests/package_boilerplate/__init__.py +subpar/tests/package_boilerplate/main +subpar/tests/package_boilerplate/main.py diff --git a/tests/package_boilerplate/main_PY3_filelist.txt b/tests/package_boilerplate/main_PY3_filelist.txt new file mode 100644 index 0000000..0b93076 --- /dev/null +++ b/tests/package_boilerplate/main_PY3_filelist.txt @@ -0,0 +1,8 @@ +__main__.py +subpar/__init__.py +subpar/runtime/__init__.py +subpar/runtime/support.py +subpar/tests/__init__.py +subpar/tests/package_boilerplate/__init__.py +subpar/tests/package_boilerplate/main +subpar/tests/package_boilerplate/main.py diff --git a/tests/package_g/g_PY2_filelist.txt b/tests/package_g/g_PY2_filelist.txt deleted file mode 100644 index b58fe85..0000000 --- a/tests/package_g/g_PY2_filelist.txt +++ /dev/null @@ -1,8 +0,0 @@ -__main__.py -subpar/__init__.py -subpar/runtime/__init__.py -subpar/runtime/support.py -subpar/tests/__init__.py -subpar/tests/package_g/__init__.py -subpar/tests/package_g/g -subpar/tests/package_g/g.py diff --git a/tests/package_g/g_PY3_filelist.txt b/tests/package_g/g_PY3_filelist.txt deleted file mode 100644 index b58fe85..0000000 --- a/tests/package_g/g_PY3_filelist.txt +++ /dev/null @@ -1,8 +0,0 @@ -__main__.py -subpar/__init__.py -subpar/runtime/__init__.py -subpar/runtime/support.py -subpar/tests/__init__.py -subpar/tests/package_g/__init__.py -subpar/tests/package_g/g -subpar/tests/package_g/g.py