From 0b4396f0688f6c82ada83eab68b1c8dd3669d8bf Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:35 -0800 Subject: [PATCH 01/14] git-p4: make python2.7 the oldest supported version Python2.6 and earlier have been end-of-life'd for many years now, and we actually already use 2.7-only features in the code. Make the version check reflect current realities. This also removes the need to explicitly define CalledProcessError if it's not available. Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/git-p4.py b/git-p4.py index 60c73b6a374ccc..37777bb9fdf2a6 100755 --- a/git-p4.py +++ b/git-p4.py @@ -8,9 +8,8 @@ # License: MIT # import sys -if sys.hexversion < 0x02040000: - # The limiter is the subprocess module - sys.stderr.write("git-p4: requires Python 2.4 or later.\n") +if sys.version_info.major < 3 and sys.version_info.minor < 7: + sys.stderr.write("git-p4: requires Python 2.7 or later.\n") sys.exit(1) import os import optparse @@ -43,21 +42,6 @@ bytes = str basestring = basestring -try: - from subprocess import CalledProcessError -except ImportError: - # from python2.7:subprocess.py - # Exception classes used by this module. - class CalledProcessError(Exception): - """This exception is raised when a process run by check_call() returns - a non-zero exit status. The exit status will be stored in the - returncode attribute.""" - def __init__(self, returncode, cmd): - self.returncode = returncode - self.cmd = cmd - def __str__(self): - return "Command '%s' returned non-zero exit status %d" % (self.cmd, self.returncode) - verbose = False # Only labels/tags matching this will be imported/exported From 484d09c303f271bdbdc2b7ed597e1d8f3959ce0f Mon Sep 17 00:00:00 2001 From: Ben Keene Date: Fri, 13 Dec 2019 15:52:36 -0800 Subject: [PATCH 02/14] git-p4: change the expansion test from basestring to list Python 3 handles strings differently than Python 2.7. Since Python 2 is reaching it's end of life, a series of changes are being submitted to enable python 3.5 and following support. The current code fails basic tests under python 3.5. Some codepaths can represent a command line the program internally prepares to execute either as a single string (i.e. each token properly quoted, concatenated with $IFS) or as a list of argv[] elements, and there are 9 places where we say "if X is isinstance(_, basestring), then do this thing to handle X as a command line in a single string; if not, X is a command line in a list form". This does not work well with Python 3, as there is no basestring (everything is Unicode now), and even with Python 2, it was not an ideal way to tell the two cases apart, because an internally formed command line could have been in a single Unicode string. Flip the check to say "if X is not a list, then handle X as a command line in a single string; otherwise treat it as a command line in a list form". This will get rid of references to 'basestring', to migrate the code ready for Python 3. Thanks-to: Junio C Hamano Signed-off-by: Ben Keene Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/git-p4.py b/git-p4.py index 37777bb9fdf2a6..2f177fb43b6dbd 100755 --- a/git-p4.py +++ b/git-p4.py @@ -89,7 +89,7 @@ def p4_build_cmd(cmd): # Provide a way to not pass this option by setting git-p4.retries to 0 real_cmd += ["-r", str(retries)] - if isinstance(cmd,basestring): + if not isinstance(cmd, list): real_cmd = ' '.join(real_cmd) + ' ' + cmd else: real_cmd += cmd @@ -155,7 +155,7 @@ def write_pipe(c, stdin): if verbose: sys.stderr.write('Writing pipe: %s\n' % str(c)) - expand = isinstance(c,basestring) + expand = not isinstance(c, list) p = subprocess.Popen(c, stdin=subprocess.PIPE, shell=expand) pipe = p.stdin val = pipe.write(stdin) @@ -177,7 +177,7 @@ def read_pipe_full(c): if verbose: sys.stderr.write('Reading pipe: %s\n' % str(c)) - expand = isinstance(c,basestring) + expand = not isinstance(c, list) p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand) (out, err) = p.communicate() return (p.returncode, out, err) @@ -213,7 +213,7 @@ def read_pipe_lines(c): if verbose: sys.stderr.write('Reading pipe: %s\n' % str(c)) - expand = isinstance(c, basestring) + expand = not isinstance(c, list) p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) pipe = p.stdout val = pipe.readlines() @@ -256,7 +256,7 @@ def p4_has_move_command(): return True def system(cmd, ignore_error=False): - expand = isinstance(cmd,basestring) + expand = not isinstance(cmd, list) if verbose: sys.stderr.write("executing %s\n" % str(cmd)) retcode = subprocess.call(cmd, shell=expand) @@ -268,7 +268,7 @@ def system(cmd, ignore_error=False): def p4_system(cmd): """Specifically invoke p4 as the system command. """ real_cmd = p4_build_cmd(cmd) - expand = isinstance(real_cmd, basestring) + expand = not isinstance(real_cmd, list) retcode = subprocess.call(real_cmd, shell=expand) if retcode: raise CalledProcessError(retcode, real_cmd) @@ -506,7 +506,7 @@ def getP4OpenedType(file): # Return the set of all p4 labels def getP4Labels(depotPaths): labels = set() - if isinstance(depotPaths,basestring): + if not isinstance(depotPaths, list): depotPaths = [depotPaths] for l in p4CmdList(["labels"] + ["%s..." % p for p in depotPaths]): @@ -593,7 +593,7 @@ def isModeExecChanged(src_mode, dst_mode): def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, errors_as_exceptions=False): - if isinstance(cmd,basestring): + if not isinstance(cmd, list): cmd = "-G " + cmd expand = True else: @@ -610,7 +610,7 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, stdin_file = None if stdin is not None: stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode=stdin_mode) - if isinstance(stdin,basestring): + if not isinstance(stdin, list): stdin_file.write(stdin) else: for i in stdin: From 1f8b46d0a4fdcb6fea566405cfc5422d39c36d52 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:37 -0800 Subject: [PATCH 03/14] git-p4: remove string type aliasing Now that python2.7 is the minimum required version and we no longer use the basestring type, it is not necessary to use type aliasing to ensure python3 compatibility. Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/git-p4.py b/git-p4.py index 2f177fb43b6dbd..153aff16f39eb8 100755 --- a/git-p4.py +++ b/git-p4.py @@ -26,22 +26,6 @@ import ctypes import errno -# support basestring in python3 -try: - unicode = unicode -except NameError: - # 'unicode' is undefined, must be Python 3 - str = str - unicode = str - bytes = bytes - basestring = (str,bytes) -else: - # 'unicode' exists, must be Python 2 - str = str - unicode = unicode - bytes = str - basestring = basestring - verbose = False # Only labels/tags matching this will be imported/exported From 6cec21a82f437184c69cce2b8fe6011d02acfa06 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:38 -0800 Subject: [PATCH 04/14] git-p4: encode/decode communication with p4 for python3 The marshalled dict in the response given on STDOUT by p4 uses `str` for keys and string values. When run using python3, these values are deserialized as `bytes`, leading to a whole host of problems as the rest of the code assumes `str` is used throughout. This patch changes the deserialization behaviour such that, as much as possible, text output from p4 is decoded to native unicode strings. Exceptions are made for the field `data` as it is usually arbitrary binary data. `depotFile[0-9]*`, `path`, and `clientFile` are also exempt as they contain path strings not encoded with UTF-8, and must survive round-trip back to p4. Conversely, text data being piped to p4 must always be encoded when running under python3. encode_text_stream() and decode_text_stream() were added to make these transformations more convenient. Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 59 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/git-p4.py b/git-p4.py index 153aff16f39eb8..ca891e3d5dc63b 100755 --- a/git-p4.py +++ b/git-p4.py @@ -135,6 +135,21 @@ def die(msg): sys.stderr.write(msg + "\n") sys.exit(1) +# We need different encoding/decoding strategies for text data being passed +# around in pipes depending on python version +if bytes is not str: + # For python3, always encode and decode as appropriate + def decode_text_stream(s): + return s.decode() if isinstance(s, bytes) else s + def encode_text_stream(s): + return s.encode() if isinstance(s, str) else s +else: + # For python2.7, pass read strings as-is, but also allow writing unicode + def decode_text_stream(s): + return s + def encode_text_stream(s): + return s.encode('utf_8') if isinstance(s, unicode) else s + def write_pipe(c, stdin): if verbose: sys.stderr.write('Writing pipe: %s\n' % str(c)) @@ -151,6 +166,8 @@ def write_pipe(c, stdin): def p4_write_pipe(c, stdin): real_cmd = p4_build_cmd(c) + if bytes is not str and isinstance(stdin, str): + stdin = encode_text_stream(stdin) return write_pipe(real_cmd, stdin) def read_pipe_full(c): @@ -164,7 +181,7 @@ def read_pipe_full(c): expand = not isinstance(c, list) p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand) (out, err) = p.communicate() - return (p.returncode, out, err) + return (p.returncode, out, decode_text_stream(err)) def read_pipe(c, ignore_error=False): """ Read output from command. Returns the output text on @@ -187,11 +204,11 @@ def read_pipe_text(c): if retcode != 0: return None else: - return out.rstrip() + return decode_text_stream(out).rstrip() -def p4_read_pipe(c, ignore_error=False): +def p4_read_pipe(c, ignore_error=False, raw=False): real_cmd = p4_build_cmd(c) - return read_pipe(real_cmd, ignore_error) + return read_pipe(real_cmd, ignore_error, raw=raw) def read_pipe_lines(c): if verbose: @@ -200,7 +217,7 @@ def read_pipe_lines(c): expand = not isinstance(c, list) p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) pipe = p.stdout - val = pipe.readlines() + val = [decode_text_stream(line) for line in pipe.readlines()] if pipe.close() or p.wait(): die('Command failed: %s' % str(c)) @@ -231,6 +248,7 @@ def p4_has_move_command(): cmd = p4_build_cmd(["move", "-k", "@from", "@to"]) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (out, err) = p.communicate() + err = decode_text_stream(err) # return code will be 1 in either case if err.find("Invalid option") >= 0: return False @@ -611,6 +629,20 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, try: while True: entry = marshal.load(p4.stdout) + if bytes is not str: + # Decode unmarshalled dict to use str keys and values, except for: + # - `data` which may contain arbitrary binary data + # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text + decoded_entry = {} + for key, value in entry.items(): + key = key.decode() + if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): + value = value.decode() + decoded_entry[key] = value + # Parse out data if it's an error response + if decoded_entry.get('code') == 'error' and 'data' in decoded_entry: + decoded_entry['data'] = decoded_entry['data'].decode() + entry = decoded_entry if skip_info: if 'code' in entry and entry['code'] == 'info': continue @@ -828,6 +860,7 @@ def branch_exists(branch): cmd = [ "git", "rev-parse", "--symbolic", "--verify", branch ] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, _ = p.communicate() + out = decode_text_stream(out) if p.returncode: return False # expect exactly one line of output: the branch name @@ -1971,7 +2004,7 @@ def applyCommit(self, id): tmpFile = os.fdopen(handle, "w+b") if self.isWindows: submitTemplate = submitTemplate.replace("\n", "\r\n") - tmpFile.write(submitTemplate) + tmpFile.write(encode_text_stream(submitTemplate)) tmpFile.close() if self.prepare_p4_only: @@ -2018,7 +2051,7 @@ def applyCommit(self, id): if self.edit_template(fileName): # read the edited message and submit tmpFile = open(fileName, "rb") - message = tmpFile.read() + message = decode_text_stream(tmpFile.read()) tmpFile.close() if self.isWindows: message = message.replace("\r\n", "\n") @@ -2707,7 +2740,7 @@ def splitFilesIntoBranches(self, commit): return branches def writeToGitStream(self, gitMode, relPath, contents): - self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write(encode_text_stream(u'M {} inline {}\n'.format(gitMode, relPath))) self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) for d in contents: self.gitStream.write(d) @@ -2748,7 +2781,7 @@ def streamOneP4File(self, file, contents): git_mode = "120000" # p4 print on a symlink sometimes contains "target\n"; # if it does, remove the newline - data = ''.join(contents) + data = ''.join(decode_text_stream(c) for c in contents) if not data: # Some version of p4 allowed creating a symlink that pointed # to nothing. This causes p4 errors when checking out such @@ -2802,7 +2835,7 @@ def streamOneP4File(self, file, contents): pattern = p4_keywords_regexp_for_type(type_base, type_mods) if pattern: regexp = re.compile(pattern, re.VERBOSE) - text = ''.join(contents) + text = ''.join(decode_text_stream(c) for c in contents) text = regexp.sub(r'$\1$', text) contents = [ text ] @@ -2817,7 +2850,7 @@ def streamOneP4Deletion(self, file): if verbose: sys.stdout.write("delete %s\n" % relPath) sys.stdout.flush() - self.gitStream.write("D %s\n" % relPath) + self.gitStream.write(encode_text_stream(u'D {}\n'.format(relPath))) if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): self.largeFileSystem.removeLargeFile(relPath) @@ -2917,9 +2950,9 @@ def streamP4FilesCbSelf(entry): if 'shelved_cl' in f: # Handle shelved CLs using the "p4 print file@=N" syntax to print # the contents - fileArg = '%s@=%d' % (f['path'], f['shelved_cl']) + fileArg = f['path'] + encode_text_stream('@={}'.format(f['shelved_cl'])) else: - fileArg = '%s#%s' % (f['path'], f['rev']) + fileArg = f['path'] + encode_text_stream('#{}'.format(f['rev'])) fileArgs.append(fileArg) From 86dca24b7be6491fb8d4b7a499ccf4a43776d1ab Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:39 -0800 Subject: [PATCH 05/14] git-p4: encode/decode communication with git for python3 Under python3, calls to write() on the stream to `git fast-import` must be encoded. This patch wraps the IO object such that this encoding is done transparently. Conversely, any text data read from subprocesses must also be decoded before running through the rest of the pipeline. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/git-p4.py b/git-p4.py index ca891e3d5dc63b..d62fb05989f4d9 100755 --- a/git-p4.py +++ b/git-p4.py @@ -183,10 +183,12 @@ def read_pipe_full(c): (out, err) = p.communicate() return (p.returncode, out, decode_text_stream(err)) -def read_pipe(c, ignore_error=False): +def read_pipe(c, ignore_error=False, raw=False): """ Read output from command. Returns the output text on success. On failure, terminates execution, unless ignore_error is True, when it returns an empty string. + + If raw is True, do not attempt to decode output text. """ (retcode, out, err) = read_pipe_full(c) if retcode != 0: @@ -194,6 +196,8 @@ def read_pipe(c, ignore_error=False): out = "" else: die('Command failed: %s\nError: %s' % (str(c), err)) + if not raw: + out = decode_text_stream(out) return out def read_pipe_text(c): @@ -220,7 +224,6 @@ def read_pipe_lines(c): val = [decode_text_stream(line) for line in pipe.readlines()] if pipe.close() or p.wait(): die('Command failed: %s' % str(c)) - return val def p4_read_pipe_lines(c): @@ -616,7 +619,8 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, stdin_file.write(stdin) else: for i in stdin: - stdin_file.write(i + '\n') + stdin_file.write(encode_text_stream(i)) + stdin_file.write(b'\n') stdin_file.flush() stdin_file.seek(0) @@ -1245,7 +1249,7 @@ def generatePointer(self, contentFile): ['git', 'lfs', 'pointer', '--file=' + contentFile], stdout=subprocess.PIPE ) - pointerFile = pointerProcess.stdout.read() + pointerFile = decode_text_stream(pointerProcess.stdout.read()) if pointerProcess.wait(): os.remove(contentFile) die('git-lfs pointer command failed. Did you install the extension?') @@ -3538,6 +3542,15 @@ def openStreams(self): self.gitStream = self.importProcess.stdin self.gitError = self.importProcess.stderr + if bytes is not str: + # Wrap gitStream.write() so that it can be called using `str` arguments + def make_encoded_write(write): + def encoded_write(s): + return write(s.encode() if isinstance(s, str) else s) + return encoded_write + + self.gitStream.write = make_encoded_write(self.gitStream.write) + def closeStreams(self): self.gitStream.close() if self.importProcess.wait() != 0: From d38208a297e76bbfbfa8e485632c217aaafa9486 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:40 -0800 Subject: [PATCH 06/14] git-p4: convert path to unicode before processing them P4 allows essentially arbitrary encoding for path data while we would perfer to be dealing only with unicode strings. Since path data need to survive round-trip back to p4, this patch implements the general policy that we store path data as-is, but decode them to unicode before doing any non-trivial processing. A new `decode_path()` method is provided that generally does the correct conversion, taking into account `git-p4.pathEncoding` configuration. For python2.7, path strings will be left as-is if it only contains ASCII characters. For python3, decoding is always done so that we have str objects. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 69 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/git-p4.py b/git-p4.py index d62fb05989f4d9..d43f373b2d06d4 100755 --- a/git-p4.py +++ b/git-p4.py @@ -150,6 +150,21 @@ def decode_text_stream(s): def encode_text_stream(s): return s.encode('utf_8') if isinstance(s, unicode) else s +def decode_path(path): + """Decode a given string (bytes or otherwise) using configured path encoding options + """ + encoding = gitConfig('git-p4.pathEncoding') or 'utf_8' + if bytes is not str: + return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path + else: + try: + path.decode('ascii') + except: + path = path.decode(encoding, errors='replace') + if verbose: + print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path)) + return path + def write_pipe(c, stdin): if verbose: sys.stderr.write('Writing pipe: %s\n' % str(c)) @@ -697,7 +712,8 @@ def p4Where(depotPath): if "depotFile" in entry: # Search for the base client side depot path, as long as it starts with the branch's P4 path. # The base path always ends with "/...". - if entry["depotFile"].find(depotPath) == 0 and entry["depotFile"][-4:] == "/...": + entry_path = decode_path(entry['depotFile']) + if entry_path.find(depotPath) == 0 and entry_path[-4:] == "/...": output = entry break elif "data" in entry: @@ -712,11 +728,11 @@ def p4Where(depotPath): return "" clientPath = "" if "path" in output: - clientPath = output.get("path") + clientPath = decode_path(output['path']) elif "data" in output: data = output.get("data") - lastSpace = data.rfind(" ") - clientPath = data[lastSpace + 1:] + lastSpace = data.rfind(b" ") + clientPath = decode_path(data[lastSpace + 1:]) if clientPath.endswith("..."): clientPath = clientPath[:-3] @@ -2484,7 +2500,7 @@ def append(self, view_line): def convert_client_path(self, clientFile): # chop off //client/ part to make it relative - if not clientFile.startswith(self.client_prefix): + if not decode_path(clientFile).startswith(self.client_prefix): die("No prefix '%s' on clientFile '%s'" % (self.client_prefix, clientFile)) return clientFile[len(self.client_prefix):] @@ -2493,7 +2509,7 @@ def update_client_spec_path_cache(self, files): """ Caching file paths by "p4 where" batch query """ # List depot file paths exclude that already cached - fileArgs = [f['path'] for f in files if f['path'] not in self.client_spec_path_cache] + fileArgs = [f['path'] for f in files if decode_path(f['path']) not in self.client_spec_path_cache] if len(fileArgs) == 0: return # All files in cache @@ -2508,16 +2524,18 @@ def update_client_spec_path_cache(self, files): if "unmap" in res: # it will list all of them, but only one not unmap-ped continue + depot_path = decode_path(res['depotFile']) if gitConfigBool("core.ignorecase"): - res['depotFile'] = res['depotFile'].lower() - self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"]) + depot_path = depot_path.lower() + self.client_spec_path_cache[depot_path] = self.convert_client_path(res["clientFile"]) # not found files or unmap files set to "" for depotFile in fileArgs: + depotFile = decode_path(depotFile) if gitConfigBool("core.ignorecase"): depotFile = depotFile.lower() if depotFile not in self.client_spec_path_cache: - self.client_spec_path_cache[depotFile] = "" + self.client_spec_path_cache[depotFile] = b'' def map_in_client(self, depot_path): """Return the relative location in the client where this @@ -2635,7 +2653,7 @@ def isPathWanted(self, path): elif path.lower() == p.lower(): return False for p in self.depotPaths: - if p4PathStartsWith(path, p): + if p4PathStartsWith(path, decode_path(p)): return True return False @@ -2644,7 +2662,7 @@ def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0): fnum = 0 while "depotFile%s" % fnum in commit: path = commit["depotFile%s" % fnum] - found = self.isPathWanted(path) + found = self.isPathWanted(decode_path(path)) if not found: fnum = fnum + 1 continue @@ -2678,7 +2696,7 @@ def stripRepoPath(self, path, prefixes): if self.useClientSpec: # branch detection moves files up a level (the branch name) # from what client spec interpretation gives - path = self.clientSpecDirs.map_in_client(path) + path = decode_path(self.clientSpecDirs.map_in_client(path)) if self.detectBranches: for b in self.knownBranches: if p4PathStartsWith(path, b + "/"): @@ -2712,14 +2730,15 @@ def splitFilesIntoBranches(self, commit): branches = {} fnum = 0 while "depotFile%s" % fnum in commit: - path = commit["depotFile%s" % fnum] + raw_path = commit["depotFile%s" % fnum] + path = decode_path(raw_path) found = self.isPathWanted(path) if not found: fnum = fnum + 1 continue file = {} - file["path"] = path + file["path"] = raw_path file["rev"] = commit["rev%s" % fnum] file["action"] = commit["action%s" % fnum] file["type"] = commit["type%s" % fnum] @@ -2728,7 +2747,7 @@ def splitFilesIntoBranches(self, commit): # start with the full relative path where this file would # go in a p4 client if self.useClientSpec: - relPath = self.clientSpecDirs.map_in_client(path) + relPath = decode_path(self.clientSpecDirs.map_in_client(path)) else: relPath = self.stripRepoPath(path, self.depotPaths) @@ -2766,14 +2785,15 @@ def encodeWithUTF8(self, path): # - helper for streamP4Files def streamOneP4File(self, file, contents): - relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) - relPath = self.encodeWithUTF8(relPath) + file_path = file['depotFile'] + relPath = self.stripRepoPath(decode_path(file_path), self.branchPrefixes) + if verbose: if 'fileSize' in self.stream_file: size = int(self.stream_file['fileSize']) else: size = 0 # deleted files don't get a fileSize apparently - sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024)) sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@ -2791,7 +2811,7 @@ def streamOneP4File(self, file, contents): # to nothing. This causes p4 errors when checking out such # a change, and errors here too. Work around it by ignoring # the bad symlink; hopefully a future change fixes it. - print("\nIgnoring empty symlink in %s" % file['depotFile']) + print("\nIgnoring empty symlink in %s" % file_path) return elif data[-1] == '\n': contents = [data[:-1]] @@ -2810,7 +2830,7 @@ def streamOneP4File(self, file, contents): # just the native "NT" type. # try: - text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])]) + text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (decode_path(file['depotFile']), file['change'])], raw=True) except Exception as e: if 'Translation of file content failed' in str(e): type_base = 'binary' @@ -2818,7 +2838,7 @@ def streamOneP4File(self, file, contents): raise e else: if p4_version_string().find('/NT') >= 0: - text = text.replace('\r\n', '\n') + text = text.replace(b'\r\n', b'\n') contents = [ text ] if type_base == "apple": @@ -2849,8 +2869,7 @@ def streamOneP4File(self, file, contents): self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): - relPath = self.stripRepoPath(file['path'], self.branchPrefixes) - relPath = self.encodeWithUTF8(relPath) + relPath = self.stripRepoPath(decode_path(file['path']), self.branchPrefixes) if verbose: sys.stdout.write("delete %s\n" % relPath) sys.stdout.flush() @@ -3037,8 +3056,8 @@ def commit(self, details, files, branch, parent = "", allow_empty=False): if self.clientSpecDirs: self.clientSpecDirs.update_client_spec_path_cache(files) - files = [f for f in files - if self.inClientSpec(f['path']) and self.hasBranchPrefix(f['path'])] + files = [f for (f, path) in ((f, decode_path(f['path'])) for f in files) + if self.inClientSpec(path) and self.hasBranchPrefix(path)] if gitConfigBool('git-p4.keepEmptyCommits'): allow_empty = True From 5a5577d808668a7859c5009ddfa18b097368ba2c Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:41 -0800 Subject: [PATCH 07/14] git-p4: open .gitp4-usercache.txt in text mode Opening .gitp4-usercache.txt in text mode makes python 3 happy without explicitly adding encoding and decoding. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-p4.py b/git-p4.py index d43f373b2d06d4..abcda60eee01e1 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1395,14 +1395,14 @@ def getUserMapFromPerforceServer(self): for (key, val) in self.users.items(): s += "%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1)) - open(self.getUserCacheFilename(), "wb").write(s) + open(self.getUserCacheFilename(), 'w').write(s) self.userMapFromPerforceServer = True def loadUserMapFromCache(self): self.users = {} self.userMapFromPerforceServer = False try: - cache = open(self.getUserCacheFilename(), "rb") + cache = open(self.getUserCacheFilename(), 'r') lines = cache.readlines() cache.close() for line in lines: From 50da1e73933386ef95499d2f7753184ba82ce2b3 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:42 -0800 Subject: [PATCH 08/14] git-p4: use marshal format version 2 when sending to p4 p4 does not appear to understand marshal format version 3 and above. Version 2 was the latest supported by python-2.7. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/git-p4.py b/git-p4.py index abcda60eee01e1..c7170c9ae69e7c 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1679,7 +1679,8 @@ def modifyChangelistUser(self, changelist, newUser): c = changes[0] if c['User'] == newUser: return # nothing to do c['User'] = newUser - input = marshal.dumps(c) + # p4 does not understand format version 3 and above + input = marshal.dumps(c, 2) result = p4CmdList("change -f -i", stdin=input) for r in result: From 4294d741cc13e1f5533efaa693e4479c62b28873 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:43 -0800 Subject: [PATCH 09/14] git-p4: fix freezing while waiting for fast-import progress As part of its importing process, git-p4 sends a `checkpoint` followed immediately by `progress` to fast-import to force synchronization. Due to buffering, it is possible for the `progress` command to not be flushed before git-p4 begins to wait for the corresponding response. This causes the script to freeze completely, and is consistently observable at least on python-3.6.9. Make sure this command sequence is completely flushed before waiting. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 1 + 1 file changed, 1 insertion(+) diff --git a/git-p4.py b/git-p4.py index c7170c9ae69e7c..68f9f4fdc6b8e0 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2641,6 +2641,7 @@ def __init__(self): def checkpoint(self): self.gitStream.write("checkpoint\n\n") self.gitStream.write("progress checkpoint\n\n") + self.gitStream.flush() out = self.gitOutput.readline() if self.verbose: print("checkpoint finished: " + out) From a6b1306735f1a6450f22f562275c994d03324672 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:44 -0800 Subject: [PATCH 10/14] git-p4: use functools.reduce instead of reduce For python3, reduce() has been moved to functools.reduce(). This is also available in python2.7. Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/git-p4.py b/git-p4.py index 68f9f4fdc6b8e0..7deee1b93918d8 100755 --- a/git-p4.py +++ b/git-p4.py @@ -13,6 +13,7 @@ sys.exit(1) import os import optparse +import functools import marshal import subprocess import tempfile @@ -1158,7 +1159,7 @@ def pushFile(self, localLargeFile): assert False, "Method 'pushFile' required in " + self.__class__.__name__ def hasLargeFileExtension(self, relPath): - return reduce( + return functools.reduce( lambda a, b: a or b, [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], False From 2e2aa8d9032ccdfdecaab51c60c5bada517f60bc Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:45 -0800 Subject: [PATCH 11/14] git-p4: use dict.items() iteration for python3 compatibility Python3 uses dict.items() instead of .iteritems() to provide iteratoration over dict. Although items() is technically less efficient for python2.7 (allocates a new list instead of simply iterating), the amount of data involved is very small and the penalty negligible. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-p4.py b/git-p4.py index 7deee1b93918d8..b7e31d47380be4 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1745,7 +1745,7 @@ def prepareSubmitTemplate(self, changelist=None): break if not change_entry: die('Failed to decode output of p4 change -o') - for key, value in change_entry.iteritems(): + for key, value in change_entry.items(): if key.startswith('File'): if 'depot-paths' in settings: if not [p for p in settings['depot-paths'] From ce425eb4e16e5038ffc322cbafc80d641b0ad5eb Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:46 -0800 Subject: [PATCH 12/14] git-p4: simplify regex pattern generation for parsing diff-tree It is not clear why a generator was used to create the regex used to parse git-diff-tree output; I assume an early implementation required it, but is not part of the mainline change. Simply use a lazily initialized global instead. Signed-off-by: Yang Zhao Reviewed-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/git-p4.py b/git-p4.py index b7e31d47380be4..3af8df9f834e78 100755 --- a/git-p4.py +++ b/git-p4.py @@ -544,12 +544,7 @@ def getGitTags(): gitTags.add(tag) return gitTags -def diffTreePattern(): - # This is a simple generator for the diff tree regex pattern. This could be - # a class variable if this and parseDiffTreeEntry were a part of a class. - pattern = re.compile(':(\d+) (\d+) (\w+) (\w+) ([A-Z])(\d+)?\t(.*?)((\t(.*))|$)') - while True: - yield pattern +_diff_tree_pattern = None def parseDiffTreeEntry(entry): """Parses a single diff tree entry into its component elements. @@ -570,7 +565,11 @@ def parseDiffTreeEntry(entry): If the pattern is not matched, None is returned.""" - match = diffTreePattern().next().match(entry) + global _diff_tree_pattern + if not _diff_tree_pattern: + _diff_tree_pattern = re.compile(':(\d+) (\d+) (\w+) (\w+) ([A-Z])(\d+)?\t(.*?)((\t(.*))|$)') + + match = _diff_tree_pattern.match(entry) if match: return { 'src_mode': match.group(1), From 7575f4fdecf22cf232975625ec53add3f2b962e4 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 13 Dec 2019 15:52:47 -0800 Subject: [PATCH 13/14] git-p4: use python3's input() everywhere Python3 deprecates raw_input() from 2.7 and replaced it with input(). Since we do not need 2.7's input() semantics, `raw_input()` is aliased to `input()` for easy forward compatability. Signed-off-by: Yang Zhao Signed-off-by: Junio C Hamano --- git-p4.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/git-p4.py b/git-p4.py index 3af8df9f834e78..17f72f43090552 100755 --- a/git-p4.py +++ b/git-p4.py @@ -27,6 +27,16 @@ import ctypes import errno +# On python2.7 where raw_input() and input() are both availble, +# we want raw_input's semantics, but aliased to input for python3 +# compatibility +# support basestring in python3 +try: + if raw_input and input: + input = raw_input +except: + pass + verbose = False # Only labels/tags matching this will be imported/exported @@ -1801,7 +1811,7 @@ def edit_template(self, template_file): return True while True: - response = raw_input("Submit template unchanged. Submit anyway? [y]es, [n]o (skip this patch) ") + response = input("Submit template unchanged. Submit anyway? [y]es, [n]o (skip this patch) ") if response == 'y': return True if response == 'n': @@ -2372,7 +2382,7 @@ def run(self, args): # prompt for what to do, or use the option/variable if self.conflict_behavior == "ask": print("What do you want to do?") - response = raw_input("[s]kip this commit but apply" + response = input("[s]kip this commit but apply" " the rest, or [q]uit? ") if not response: continue From 6bb40ed20a34a1c2f49efaef927afb63c4a81d41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Thu, 23 Jan 2020 18:56:45 +0100 Subject: [PATCH 14/14] ci: use python3 in linux-gcc and osx-gcc and python2 elsewhere MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python2 reached end of life, and we have been preparing our Python scripts to work with Python3. 'git p4', the main in-tree user of Python, has just received a number of compatibility updates. Our other notable Python script 'contrib/svn-fe/svnrdump_sim.py' is only used in 't9020-remote-svn.sh', and is apparently already compatible with both Python2 and 3. Our CI jobs currently only use Python2. We want to make sure that these Python scripts do indeed work with Python3, and we also want to make sure that these scripts keep working with Python2 as well, for the sake of some older LTS/Enterprise setups. Therefore, pick two jobs and use Python3 there, while leaving other jobs to still stick to Python2 for now. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- ci/lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/lib.sh b/ci/lib.sh index a90d0dc0fd2ae3..c3a8cd2104641e 100755 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -162,6 +162,9 @@ linux-clang|linux-gcc) if [ "$jobname" = linux-gcc ] then export CC=gcc-8 + MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=$(which python3)" + else + MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=$(which python2)" fi export GIT_TEST_HTTPD=true @@ -182,6 +185,9 @@ osx-clang|osx-gcc) if [ "$jobname" = osx-gcc ] then export CC=gcc-9 + MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=$(which python3)" + else + MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=$(which python2)" fi # t9810 occasionally fails on Travis CI OS X