Skip to content

Commit

Permalink
curl_multibyte: support Windows paths longer than MAX_PATH
Browse files Browse the repository at this point in the history
- Add a helper function for the Windows file wrapper functions that will
  normalize a long path (or a filename in a long path) and add the
  prefix `\\?\` so that Windows will access the file.

Prior to this change if a filename (when normalized internally by
Windows to its full path) or a path was longer than MAX_PATH (260) then
Windows would not open the path, unless it was already normalized by the
user and had the `\\?\` prefix prepended.

The `\\?\` prefix could not be passed to file:// so for example
something like file://c:/foo/bar/filename255chars could not be opened
prior to this change.

There's some code in tool_doswin that will need to be modified as well
to further remove MAX_PATH (aka PATH_MAX) limitation.

Ref: #8361
Ref: #13512
Ref: https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
Ref: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation

Closes #xxxx
  • Loading branch information
jay committed May 3, 2024
1 parent 6e4b7ab commit f7e60b5
Showing 1 changed file with 160 additions and 19 deletions.
179 changes: 160 additions & 19 deletions lib/curl_multibyte.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,103 @@ char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w)

#if defined(USE_WIN32_LARGE_FILES) || defined(USE_WIN32_SMALL_FILES)


/* Fix excessive paths (paths that exceed MAX_PATH length of 260).
*
* This is a helper function to fix paths that would exceed the MAX_PATH
* limitation check done by Windows APIs. It does so by normalizing the passed
* in filename or path 'in' to its full canonical path, and if that path is
* longer than MAX_PATH then setting 'out' to "\\?\" prefix + that full path.
*
* For example 'in' filename255chars in current directory C:\foo\bar is
* fixed as \\?\C:\foo\bar\filename255chars for 'out' which will tell Windows
* it's ok to access that filename even though the actual full path is longer
* than 255 chars.
*
* For non-Unicode builds this function may fail sometimes because only the
* Unicode versions of some Windows API functions can access paths longer than
* MAX_PATH, for example GetFullPathNameW which is used in this function. When
* the full path is then converted from Unicode to multibyte that fails if any
* directories in the path contain characters not in the current codepage.
*/
static bool fix_excessive_path(const TCHAR *in, TCHAR **out)
{
size_t needed, written;
const wchar_t *in_w;
wchar_t *fbuf = NULL;

#ifndef _UNICODE
wchar_t *ibuf = NULL;
char *obuf = NULL;
#endif

*out = NULL;

#ifndef _UNICODE
/* convert multibyte input to unicode */
needed = mbstowcs(NULL, in, 0);
if(needed == (size_t)-1 || needed >= (32767 - 4))
goto error;
++needed; /* for NUL */
ibuf = malloc(needed * sizeof(wchar_t));
if(!ibuf)
goto error;
written = mbstowcs(ibuf, in, needed);
if(written == (size_t)-1 || written >= needed)
goto error;
in_w = ibuf;
#else
in_w = in;
#endif

/* get full unicode path of the unicode filename or path */
needed = (size_t)GetFullPathNameW(in_w, 0, NULL, NULL);
if(!needed || needed > (32767 - 4))
goto error;
fbuf = malloc((needed + 4)* sizeof(wchar_t));
if(!fbuf)
goto error;
wcsncpy(fbuf, L"\\\\?\\", 4);
written = (size_t)GetFullPathNameW(in_w, needed, fbuf + 4, NULL);
if(!written || written >= needed)
goto error;

#ifndef _UNICODE
/* convert unicode full path to multibyte output */
needed = wcstombs(NULL, fbuf, 0);
if(needed == (size_t)-1 || needed >= 32767)
goto error;
++needed; /* for NUL */
obuf = malloc(needed);
if(!obuf)
goto error;
written = wcstombs(obuf, fbuf, needed);
if(written == (size_t)-1 || written >= needed)
goto error;
*out = obuf;
obuf = NULL;
#else
*out = fbuf;
fbuf = NULL;
#endif

error:
free(fbuf);
#ifndef _UNICODE
free(ibuf);
free(obuf);
#endif
return (*out ? true : false);
}

int curlx_win32_open(const char *filename, int oflag, ...)
{
int pmode = 0;
int result = -1;
TCHAR *fixed = NULL;
const TCHAR *target = NULL;

#ifdef _UNICODE
int result = -1;
wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename);
#endif

Expand All @@ -105,75 +196,125 @@ int curlx_win32_open(const char *filename, int oflag, ...)

#ifdef _UNICODE
if(filename_w) {
result = _wopen(filename_w, oflag, pmode);
if(fix_excessive_path(filename_w, &fixed))
target = fixed;
else
target = filename_w;
result = _wopen(target, oflag, pmode);
curlx_unicodefree(filename_w);
}
else
errno = EINVAL;
return result;
#else
return (_open)(filename, oflag, pmode);
if(fix_excessive_path(filename, &fixed))
target = fixed;
else
target = filename;
result = (_open)(target, oflag, pmode);
#endif

free(fixed);
return result;
}

FILE *curlx_win32_fopen(const char *filename, const char *mode)
{
#ifdef _UNICODE
FILE *result = NULL;
TCHAR *fixed = NULL;
const TCHAR *target = NULL;

#ifdef _UNICODE
wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename);
wchar_t *mode_w = curlx_convert_UTF8_to_wchar(mode);
if(filename_w && mode_w)
result = _wfopen(filename_w, mode_w);
if(filename_w && mode_w) {
if(fix_excessive_path(filename_w, &fixed))
target = fixed;
else
target = filename_w;
result = _wfopen(target, mode_w);
}
else
errno = EINVAL;
curlx_unicodefree(filename_w);
curlx_unicodefree(mode_w);
return result;
#else
return (fopen)(filename, mode);
if(fix_excessive_path(filename, &fixed))
target = fixed;
else
target = filename;
result = (fopen)(target, mode);
#endif

free(fixed);
return result;
}

int curlx_win32_stat(const char *path, struct_stat *buffer)
{
#ifdef _UNICODE
int result = -1;
TCHAR *fixed = NULL;
const TCHAR *target = NULL;

#ifdef _UNICODE
wchar_t *path_w = curlx_convert_UTF8_to_wchar(path);
if(path_w) {
if(fix_excessive_path(path_w, &fixed))
target = fixed;
else
target = path_w;
#if defined(USE_WIN32_SMALL_FILES)
result = _wstat(path_w, buffer);
result = _wstat(target, buffer);
#else
result = _wstati64(path_w, buffer);
result = _wstati64(target, buffer);
#endif
curlx_unicodefree(path_w);
}
else
errno = EINVAL;
return result;
#else
if(fix_excessive_path(path, &fixed))
target = fixed;
else
target = path;
#if defined(USE_WIN32_SMALL_FILES)
return _stat(path, buffer);
result = _stat(target, buffer);
#else
return _stati64(path, buffer);
result = _stati64(target, buffer);
#endif
#endif

free(fixed);
return result;
}

int curlx_win32_access(const char *path, int mode)
{
#if defined(_UNICODE)
int result = -1;
TCHAR *fixed = NULL;
const TCHAR *target = NULL;

#if defined(_UNICODE)
wchar_t *path_w = curlx_convert_UTF8_to_wchar(path);
if(path_w) {
result = _waccess(path_w, mode);
if(fix_excessive_path(path_w, &fixed))
target = fixed;
else
target = path_w;
result = _waccess(target, mode);
curlx_unicodefree(path_w);
}
else
errno = EINVAL;
return result;
#else
return _access(path, mode);
if(fix_excessive_path(path, &fixed))
target = fixed;
else
target = path;
result = _access(target, mode);
#endif

free(fixed);
return result;
}

#endif /* USE_WIN32_LARGE_FILES || USE_WIN32_SMALL_FILES */

0 comments on commit f7e60b5

Please sign in to comment.