Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 84 additions & 5 deletions emar.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,98 @@

"""Archive helper script

This script is a simple wrapper around llvm-ar. It used to have special
handling for duplicate basenames in order to allow bitcode linking process to
read such files. This is now handled by using tools/arfile.py to read archives.
This script acts as a frontend replacement for `ar`. See emcc.
This is needed because, unlike a traditional linker, emscripten can't handle
archive with duplicate member names. This is because emscripten extracts
archive to a temporary location and duplicate filenames will clobber each
other in this case.
"""

# TODO(sbc): Implement `ar x` within emscripten, in python, to avoid this issue
# and delete this file.

from __future__ import print_function
import hashlib
import os
import shutil
import sys

from tools.toolchain_profiler import ToolchainProfiler
from tools import shared
from tools.response_file import substitute_response_files, create_response_file

if __name__ == '__main__':
ToolchainProfiler.record_process_start()


#
# Main run() function
#
def run():
newargs = [shared.LLVM_AR] + sys.argv[1:]
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
args = substitute_response_files(sys.argv)
newargs = [shared.LLVM_AR] + args[1:]

to_delete = []

# The 3 argmuent form of ar doesn't involve other files. For example
# 'ar x libfoo.a'.
if len(newargs) > 3:
cmd = newargs[1]
if 'r' in cmd:
# We are adding files to the archive.
# Normally the output file is then arg 2, except in the case were the
# a or b modifiers are used in which case its arg 3.
if 'a' in cmd or 'b' in cmd:
out_arg_index = 3
else:
out_arg_index = 2

contents = set()
if os.path.exists(newargs[out_arg_index]):
cmd = [shared.LLVM_AR, 't', newargs[out_arg_index]]
output = shared.check_call(cmd, stdout=shared.PIPE).stdout
contents.update(output.split('\n'))

# Add a hash to colliding basename, to make them unique.
for j in range(out_arg_index + 1, len(newargs)):
orig_name = newargs[j]
full_name = os.path.abspath(orig_name)
dirname = os.path.dirname(full_name)
basename = os.path.basename(full_name)
if basename not in contents:
contents.add(basename)
continue
h = hashlib.md5(full_name.encode('utf-8')).hexdigest()[:8]
parts = basename.split('.')
parts[0] += '_' + h
newname = '.'.join(parts)
full_newname = os.path.join(dirname, newname)
assert not os.path.exists(full_newname)
try:
shutil.copyfile(orig_name, full_newname)
newargs[j] = full_newname
to_delete.append(full_newname)
contents.add(newname)
except:
# it is ok to fail here, we just don't get hashing
contents.add(basename)
pass

if shared.DEBUG:
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)

response_filename = create_response_file(newargs[3:], shared.get_emscripten_temp_dir())
to_delete += [response_filename]
newargs = newargs[:3] + ['@' + response_filename]

if shared.DEBUG:
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)

try:
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
finally:
for d in to_delete:
shared.try_delete(d)


if __name__ == '__main__':
Expand Down
14 changes: 8 additions & 6 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import hashlib
import json
import os
import random
import re
import shutil
import sys
Expand Down Expand Up @@ -5163,25 +5164,26 @@ def test_iostream_and_determinism(self):
return 0;
}
'''
num = 3
num = 5

def test():
print('(iteration)')
time.sleep(1.0)
time.sleep(random.random() / (10 * num)) # add some timing nondeterminism here, not that we need it, but whatever
self.do_run(src, 'hello world\n77.\n')
ret = open('src.cpp.o.js', 'rb').read()
if self.get_setting('WASM') and not self.get_setting('WASM2JS'):
ret += open('src.cpp.o.wasm', 'rb').read()
return ret

builds = [test() for i in range(num)]
print([len(b) for b in builds])
print(list(map(len, builds)))
uniques = set(builds)
if len(uniques) != 1:
for i, unique in enumerate(uniques):
i = 0
for unique in uniques:
open('unique_' + str(i) + '.js', 'wb').write(unique)
# builds must be deterministic, see unique_N.js
self.assertEqual(len(uniques), 1)
i += 1
assert 0, 'builds must be deterministic, see unique_X.js'

def test_stdvec(self):
self.do_run_in_out_file_test('tests', 'core', 'test_stdvec')
Expand Down
37 changes: 32 additions & 5 deletions tests/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -1450,10 +1450,16 @@ def test_archive_duplicate_basenames(self):
''')
run_process([PYTHON, EMCC, os.path.join('b', 'common.c'), '-c', '-o', os.path.join('b', 'common.o')])

try_delete('libdup.a')
run_process([PYTHON, EMAR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
self.assertEqual(text.count('common.o'), 2)
try_delete('liba.a')
run_process([PYTHON, EMAR, 'rc', 'liba.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])

# Verify that archive contains basenames with hashes to avoid duplication
text = run_process([PYTHON, EMAR, 't', 'liba.a'], stdout=PIPE).stdout
self.assertEqual(text.count('common.o'), 1)
self.assertContained('common_', text)
for line in text.split('\n'):
# should not have huge hash names
self.assertLess(len(line), 20, line)

create_test_file('main.c', r'''
void a(void);
Expand All @@ -1463,9 +1469,30 @@ def test_archive_duplicate_basenames(self):
b();
}
''')
run_process([PYTHON, EMCC, 'main.c', '-L.', '-ldup'])
err = run_process([PYTHON, EMCC, 'main.c', '-L.', '-la'], stderr=PIPE).stderr
self.assertNotIn('archive file contains duplicate entries', err)
self.assertContained('a\nb...\n', run_js('a.out.js'))

# Using llvm-ar directly should cause duplicate basenames
try_delete('libdup.a')
run_process([LLVM_AR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
assert text.count('common.o') == 2, text

# With fastcomp we don't support duplicate members so this should generate
# a warning. With the wasm backend (lld) this is fully supported.
cmd = [PYTHON, EMCC, 'main.c', '-L.', '-ldup']
if self.is_wasm_backend():
run_process(cmd)
self.assertContained('a\nb...\n', run_js('a.out.js'))
else:
err = self.expect_fail(cmd)
self.assertIn('libdup.a: archive file contains duplicate entries', err)
self.assertIn('error: undefined symbol: a', err)
# others are not duplicates - the hashing keeps them separate
self.assertEqual(err.count('duplicate: '), 1)
self.assertContained('a\nb...\n', run_js('a.out.js'))

def test_export_from_archive(self):
export_name = 'this_is_an_entry_point'
full_export_name = '_' + export_name
Expand Down
191 changes: 0 additions & 191 deletions tools/arfile.py

This file was deleted.

Loading