diff --git a/.gitignore b/.gitignore index 08a66ca50844c5..27f05e18c66350 100644 --- a/.gitignore +++ b/.gitignore @@ -253,3 +253,5 @@ Release/ /contrib/buildsystems/out /contrib/libgit-rs/target /contrib/libgit-sys/target + +.idea/ diff --git a/git-p4.py b/git-p4.py index c0ca7becaf4861..72a4c55f99eb39 100755 --- a/git-p4.py +++ b/git-p4.py @@ -234,67 +234,91 @@ def encode_text_stream(s): class MetadataDecodingException(Exception): - def __init__(self, input_string): + def __init__(self, input_string, error=None): self.input_string = input_string + self.error = error def __str__(self): - return """Decoding perforce metadata failed! + message = """Decoding perforce metadata failed! The failing string was: --- {} --- Consider setting the git-p4.metadataDecodingStrategy config option to 'fallback', to allow metadata to be decoded using a fallback encoding, -defaulting to cp1252.""".format(self.input_string) +defaulting to cp1252.""" + if verbose and self.error is not None: + message += """ +--- +Error: +--- +{}""" + return message.format(self.input_string, self.error) -encoding_fallback_warning_issued = False -encoding_escape_warning_issued = False -def metadata_stream_to_writable_bytes(s): - encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy - fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding - if not isinstance(s, bytes): - return s.encode('utf_8') - if encodingStrategy == 'passthrough': - return s - try: - s.decode('utf_8') - return s - except UnicodeDecodeError: - if encodingStrategy == 'fallback' and fallbackEncoding: - global encoding_fallback_warning_issued - global encoding_escape_warning_issued - try: - if not encoding_fallback_warning_issued: - print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s)) - print("\n(this warning is only displayed once during an import)") - encoding_fallback_warning_issued = True - return s.decode(fallbackEncoding).encode('utf_8') - except Exception as exc: - if not encoding_escape_warning_issued: - print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s)) - print("\n(this warning is only displayed once during an import)") - encoding_escape_warning_issued = True - escaped_bytes = b'' - # bytes and strings work very differently in python2 vs python3... - if str is bytes: - for byte in s: - byte_number = struct.unpack('>B', byte)[0] - if byte_number > 127: - escaped_bytes += b'%' - escaped_bytes += hex(byte_number)[2:].upper() - else: - escaped_bytes += byte - else: - for byte_number in s: - if byte_number > 127: - escaped_bytes += b'%' - escaped_bytes += hex(byte_number).upper().encode()[2:] - else: - escaped_bytes += bytes([byte_number]) - return escaped_bytes +class MetadataTranscoder: + def __init__(self, default_metadata_decoding_strategy, default_fallback_metadata_encoding): + self.decoding_fallback_warning_issued = False + self.decoding_escape_warning_issued = False + self.decodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or default_metadata_decoding_strategy + self.fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or default_fallback_metadata_encoding + + def decode_metadata(self, s, error_from_fallback=True): + try: + return [s.decode('utf_8'), 'utf_8'] + except UnicodeDecodeError as decode_exception: + error = decode_exception + if self.decodingStrategy == 'fallback' and self.fallbackEncoding: + try: + if not self.decoding_fallback_warning_issued: + print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (self.fallbackEncoding, s)) + print("\n(this warning is only displayed once during an import)") + self.decoding_fallback_warning_issued = True + return [s.decode(self.fallbackEncoding), self.fallbackEncoding] + except Exception as decode_exception: + if not error_from_fallback: + return [s, None] + error = decode_exception + raise MetadataDecodingException(s, error) + + def metadata_stream_to_writable_bytes(self, s): + if not isinstance(s, bytes): + return s.encode('utf_8') + if self.decodingStrategy == 'passthrough': + return s + + [text, encoding] = self.decode_metadata(s, False) + if encoding == 'utf_8': + # s is of utf-8 already + return s + + if encoding is None: + # could not decode s, even with fallback encoding + if not self.decoding_escape_warning_issued: + print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (self.fallbackEncoding, s)) + print("\n(this warning is only displayed once during an import)") + self.decoding_escape_warning_issued = True + escaped_bytes = b'' + # bytes and strings work very differently in python2 vs python3... + if str is bytes: + for byte in s: + byte_number = struct.unpack('>B', byte)[0] + if byte_number > 127: + escaped_bytes += b'%' + escaped_bytes += hex(byte_number)[2:].upper() + else: + escaped_bytes += byte + else: + for byte_number in s: + if byte_number > 127: + escaped_bytes += b'%' + escaped_bytes += hex(byte_number).upper().encode()[2:] + else: + escaped_bytes += bytes([byte_number]) + return escaped_bytes - raise MetadataDecodingException(s) + # were able to decode but not to utf-8 + return text.encode('utf_8') def decode_path(path): @@ -898,14 +922,14 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, decoded_entry[key] = value # Parse out data if it's an error response if decoded_entry.get('code') == 'error' and 'data' in decoded_entry: - decoded_entry['data'] = decoded_entry['data'].decode() + decoded_entry['data'] = metadataTranscoder.decode_metadata(decoded_entry['data']) entry = decoded_entry if skip_info: if 'code' in entry and entry['code'] == 'info': continue for key in p4KeysContainingNonUtf8Chars(): if key in entry: - entry[key] = metadata_stream_to_writable_bytes(entry[key]) + entry[key] = metadataTranscoder.metadata_stream_to_writable_bytes(entry[key]) if cb is not None: cb(entry) else: @@ -1718,7 +1742,7 @@ def getUserMapFromPerforceServer(self): # python2 or python3. To support # git-p4.metadataDecodingStrategy=fallback, self.users dict values # are always bytes, ready to be written to git. - emailbytes = metadata_stream_to_writable_bytes(output["Email"]) + emailbytes = metadataTranscoder.metadata_stream_to_writable_bytes(output["Email"]) self.users[output["User"]] = output["FullName"] + b" <" + emailbytes + b">" self.emails[output["Email"]] = output["User"] @@ -1730,12 +1754,12 @@ def getUserMapFromPerforceServer(self): fullname = mapUser[0][1] email = mapUser[0][2] fulluser = fullname + " <" + email + ">" - self.users[user] = metadata_stream_to_writable_bytes(fulluser) + self.users[user] = metadataTranscoder.metadata_stream_to_writable_bytes(fulluser) self.emails[email] = user s = b'' for (key, val) in self.users.items(): - keybytes = metadata_stream_to_writable_bytes(key) + keybytes = metadataTranscoder.metadata_stream_to_writable_bytes(key) s += b"%s\t%s\n" % (keybytes.expandtabs(1), val.expandtabs(1)) open(self.getUserCacheFilename(), 'wb').write(s) @@ -3349,7 +3373,7 @@ def make_email(self, userid): if userid in self.users: return self.users[userid] else: - userid_bytes = metadata_stream_to_writable_bytes(userid) + userid_bytes = metadataTranscoder.metadata_stream_to_writable_bytes(userid) return b"%s " % userid_bytes def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): @@ -4561,6 +4585,7 @@ def printUsage(commands): "unshelve": P4Unshelve, } +metadataTranscoder = MetadataTranscoder(defaultMetadataDecodingStrategy, defaultFallbackMetadataEncoding) def main(): if len(sys.argv[1:]) == 0: diff --git a/t/meson.build b/t/meson.build index a59da26be3f471..656424fdff35f1 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1090,6 +1090,7 @@ integration_tests = [ 't9834-git-p4-file-dir-bug.sh', 't9835-git-p4-metadata-encoding-python2.sh', 't9836-git-p4-metadata-encoding-python3.sh', + 't9837-git-p4-error-encoding.sh', 't9850-shell.sh', 't9901-git-web--browse.sh', 't9902-completion.sh', diff --git a/t/t9837-git-p4-error-encoding.sh b/t/t9837-git-p4-error-encoding.sh new file mode 100755 index 00000000000000..1ea774afb1befd --- /dev/null +++ b/t/t9837-git-p4-error-encoding.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +test_description='git p4 error encoding + +This test checks that the import process handles inconsistent text +encoding in p4 error messages without failing' + +. ./lib-git-p4.sh + +############################### +## SECTION REPEATED IN t9835 ## +############################### + +# These tests require Perforce with non-unicode setup. +out=$(2>&1 P4CHARSET=utf8 p4 client -o) +if test $? -eq 0 +then + skip_all="skipping git p4 error encoding tests; Perforce is setup with unicode" + test_done +fi + +# These tests are specific to Python 3. Write a custom script that executes +# git-p4 directly with the Python 3 interpreter to ensure that we use that +# version even if Git was compiled with Python 2. +python_target_binary=$(which python3) +if test -n "$python_target_binary" +then + mkdir temp_python + PATH="$(pwd)/temp_python:$PATH" + export PATH + + write_script temp_python/git-p4-python3 <<-EOF + exec "$python_target_binary" "$(git --exec-path)/git-p4" "\$@" + EOF +fi + +git p4-python3 >err +if ! grep 'valid commands' err +then + skip_all="skipping python3 git p4 tests; python3 not available" + test_done +fi + +test_expect_success 'start p4d' ' + start_p4d +' + +test_expect_success 'see if Perforce error with characters not convertable to utf-8 will be processed correctly' ' + test_when_finished cleanup_git && + $python_target_binary "$TEST_DIRECTORY"/t9837/git-p4-error-python3.py "$TEST_DIRECTORY" +' + +test_done diff --git a/t/t9837/git-p4-error-python3.py b/t/t9837/git-p4-error-python3.py new file mode 100644 index 00000000000000..fb65aee386e2e3 --- /dev/null +++ b/t/t9837/git-p4-error-python3.py @@ -0,0 +1,15 @@ +import os +import sys +from importlib.machinery import SourceFileLoader + +def main(): + if len(sys.argv[1:]) != 1: + print("Expected test directory name") + + gitp4_path = sys.argv[1] + "/../git-p4.py" + gitp4 = SourceFileLoader("gitp4", gitp4_path).load_module() + gitp4.p4CmdList(["edit", b'\xFEfile']) + +if __name__ == '__main__': + main() +