Skip to content

Commit

Permalink
git-p4: convert path to unicode before processing them
Browse files Browse the repository at this point in the history
P4 allows essentially arbitrary encoding for path data while we would
perfer to be dealing only with unicode strings.  Since path data need to
survive round-trip back to p4, this patch implements the general policy
that we store path data as-is, but decode them to unicode before doing
any non-trivial processing.

A new `decode_path()` method is provided that generally does the correct
conversion, taking into account `git-p4.pathEncoding` configuration.

For python2.7, path strings will be left as-is if it only contains ASCII
characters.

For python3, decoding is always done so that we have str objects.

Signed-off-by: Yang Zhao <yang.zhao@skyboxlabs.com>
Reviewed-by: Ben Keene <seraphire@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
yangskyboxlabs authored and gitster committed Jan 15, 2020
1 parent 86dca24 commit d38208a
Showing 1 changed file with 44 additions and 25 deletions.
69 changes: 44 additions & 25 deletions git-p4.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ def decode_text_stream(s):
def encode_text_stream(s):
return s.encode('utf_8') if isinstance(s, unicode) else s

def decode_path(path):
"""Decode a given string (bytes or otherwise) using configured path encoding options
"""
encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
if bytes is not str:
return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
else:
try:
path.decode('ascii')
except:
path = path.decode(encoding, errors='replace')
if verbose:
print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path))
return path

def write_pipe(c, stdin):
if verbose:
sys.stderr.write('Writing pipe: %s\n' % str(c))
Expand Down Expand Up @@ -697,7 +712,8 @@ def p4Where(depotPath):
if "depotFile" in entry:
# Search for the base client side depot path, as long as it starts with the branch's P4 path.
# The base path always ends with "/...".
if entry["depotFile"].find(depotPath) == 0 and entry["depotFile"][-4:] == "/...":
entry_path = decode_path(entry['depotFile'])
if entry_path.find(depotPath) == 0 and entry_path[-4:] == "/...":
output = entry
break
elif "data" in entry:
Expand All @@ -712,11 +728,11 @@ def p4Where(depotPath):
return ""
clientPath = ""
if "path" in output:
clientPath = output.get("path")
clientPath = decode_path(output['path'])
elif "data" in output:
data = output.get("data")
lastSpace = data.rfind(" ")
clientPath = data[lastSpace + 1:]
lastSpace = data.rfind(b" ")
clientPath = decode_path(data[lastSpace + 1:])

if clientPath.endswith("..."):
clientPath = clientPath[:-3]
Expand Down Expand Up @@ -2484,7 +2500,7 @@ def append(self, view_line):

def convert_client_path(self, clientFile):
# chop off //client/ part to make it relative
if not clientFile.startswith(self.client_prefix):
if not decode_path(clientFile).startswith(self.client_prefix):
die("No prefix '%s' on clientFile '%s'" %
(self.client_prefix, clientFile))
return clientFile[len(self.client_prefix):]
Expand All @@ -2493,7 +2509,7 @@ def update_client_spec_path_cache(self, files):
""" Caching file paths by "p4 where" batch query """

# List depot file paths exclude that already cached
fileArgs = [f['path'] for f in files if f['path'] not in self.client_spec_path_cache]
fileArgs = [f['path'] for f in files if decode_path(f['path']) not in self.client_spec_path_cache]

if len(fileArgs) == 0:
return # All files in cache
Expand All @@ -2508,16 +2524,18 @@ def update_client_spec_path_cache(self, files):
if "unmap" in res:
# it will list all of them, but only one not unmap-ped
continue
depot_path = decode_path(res['depotFile'])
if gitConfigBool("core.ignorecase"):
res['depotFile'] = res['depotFile'].lower()
self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"])
depot_path = depot_path.lower()
self.client_spec_path_cache[depot_path] = self.convert_client_path(res["clientFile"])

# not found files or unmap files set to ""
for depotFile in fileArgs:
depotFile = decode_path(depotFile)
if gitConfigBool("core.ignorecase"):
depotFile = depotFile.lower()
if depotFile not in self.client_spec_path_cache:
self.client_spec_path_cache[depotFile] = ""
self.client_spec_path_cache[depotFile] = b''

def map_in_client(self, depot_path):
"""Return the relative location in the client where this
Expand Down Expand Up @@ -2635,7 +2653,7 @@ def isPathWanted(self, path):
elif path.lower() == p.lower():
return False
for p in self.depotPaths:
if p4PathStartsWith(path, p):
if p4PathStartsWith(path, decode_path(p)):
return True
return False

Expand All @@ -2644,7 +2662,7 @@ def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0):
fnum = 0
while "depotFile%s" % fnum in commit:
path = commit["depotFile%s" % fnum]
found = self.isPathWanted(path)
found = self.isPathWanted(decode_path(path))
if not found:
fnum = fnum + 1
continue
Expand Down Expand Up @@ -2678,7 +2696,7 @@ def stripRepoPath(self, path, prefixes):
if self.useClientSpec:
# branch detection moves files up a level (the branch name)
# from what client spec interpretation gives
path = self.clientSpecDirs.map_in_client(path)
path = decode_path(self.clientSpecDirs.map_in_client(path))
if self.detectBranches:
for b in self.knownBranches:
if p4PathStartsWith(path, b + "/"):
Expand Down Expand Up @@ -2712,14 +2730,15 @@ def splitFilesIntoBranches(self, commit):
branches = {}
fnum = 0
while "depotFile%s" % fnum in commit:
path = commit["depotFile%s" % fnum]
raw_path = commit["depotFile%s" % fnum]
path = decode_path(raw_path)
found = self.isPathWanted(path)
if not found:
fnum = fnum + 1
continue

file = {}
file["path"] = path
file["path"] = raw_path
file["rev"] = commit["rev%s" % fnum]
file["action"] = commit["action%s" % fnum]
file["type"] = commit["type%s" % fnum]
Expand All @@ -2728,7 +2747,7 @@ def splitFilesIntoBranches(self, commit):
# start with the full relative path where this file would
# go in a p4 client
if self.useClientSpec:
relPath = self.clientSpecDirs.map_in_client(path)
relPath = decode_path(self.clientSpecDirs.map_in_client(path))
else:
relPath = self.stripRepoPath(path, self.depotPaths)

Expand Down Expand Up @@ -2766,14 +2785,15 @@ def encodeWithUTF8(self, path):
# - helper for streamP4Files

def streamOneP4File(self, file, contents):
relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
relPath = self.encodeWithUTF8(relPath)
file_path = file['depotFile']
relPath = self.stripRepoPath(decode_path(file_path), self.branchPrefixes)

if verbose:
if 'fileSize' in self.stream_file:
size = int(self.stream_file['fileSize'])
else:
size = 0 # deleted files don't get a fileSize apparently
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
sys.stdout.flush()

(type_base, type_mods) = split_p4_type(file["type"])
Expand All @@ -2791,7 +2811,7 @@ def streamOneP4File(self, file, contents):
# to nothing. This causes p4 errors when checking out such
# a change, and errors here too. Work around it by ignoring
# the bad symlink; hopefully a future change fixes it.
print("\nIgnoring empty symlink in %s" % file['depotFile'])
print("\nIgnoring empty symlink in %s" % file_path)
return
elif data[-1] == '\n':
contents = [data[:-1]]
Expand All @@ -2810,15 +2830,15 @@ def streamOneP4File(self, file, contents):
# just the native "NT" type.
#
try:
text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])])
text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (decode_path(file['depotFile']), file['change'])], raw=True)
except Exception as e:
if 'Translation of file content failed' in str(e):
type_base = 'binary'
else:
raise e
else:
if p4_version_string().find('/NT') >= 0:
text = text.replace('\r\n', '\n')
text = text.replace(b'\r\n', b'\n')
contents = [ text ]

if type_base == "apple":
Expand Down Expand Up @@ -2849,8 +2869,7 @@ def streamOneP4File(self, file, contents):
self.writeToGitStream(git_mode, relPath, contents)

def streamOneP4Deletion(self, file):
relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
relPath = self.encodeWithUTF8(relPath)
relPath = self.stripRepoPath(decode_path(file['path']), self.branchPrefixes)
if verbose:
sys.stdout.write("delete %s\n" % relPath)
sys.stdout.flush()
Expand Down Expand Up @@ -3037,8 +3056,8 @@ def commit(self, details, files, branch, parent = "", allow_empty=False):
if self.clientSpecDirs:
self.clientSpecDirs.update_client_spec_path_cache(files)

files = [f for f in files
if self.inClientSpec(f['path']) and self.hasBranchPrefix(f['path'])]
files = [f for (f, path) in ((f, decode_path(f['path'])) for f in files)
if self.inClientSpec(path) and self.hasBranchPrefix(path)]

if gitConfigBool('git-p4.keepEmptyCommits'):
allow_empty = True
Expand Down

0 comments on commit d38208a

Please sign in to comment.