From 53602669f58ddbeb3294d7196b3320aaaed22728 Mon Sep 17 00:00:00 2001 From: isaacs Date: Wed, 4 Aug 2021 15:48:21 -0700 Subject: [PATCH] fix: normalize paths on Windows systems This change uses / as the One True Path Separator, as the gods of POSIX intended in their divine wisdom. On windows, \ characters are converted to /, everywhere and in depth. However, on posix systems, \ is a valid filename character, and is not treated specially. So, instead of splitting on `/[/\\]/`, we can now just split on `'/'` to get a set of path parts. This does mean that archives with entries containing \ will extract differently on Windows systems than on correct systems. However, this is also the behavior of both bsdtar and gnutar, so it seems appropriate to follow suit. Additionally, dirCache pruning is now done case-insensitively. On case-sensitive systems, this potentially results in a few extra lstat calls. However, on case-insensitive systems, it prevents incorrect cache hits. --- lib/mkdir.js | 32 ++++++++++++-------- lib/normalize-windows-path.js | 8 +++++ lib/pack.js | 7 +++-- lib/path-reservations.js | 6 ++-- lib/unpack.js | 55 +++++++++++++++++----------------- lib/write-entry.js | 10 ++++--- test/normalize-windows-path.js | 26 ++++++++++++++++ 7 files changed, 95 insertions(+), 49 deletions(-) create mode 100644 lib/normalize-windows-path.js create mode 100644 test/normalize-windows-path.js diff --git a/lib/mkdir.js b/lib/mkdir.js index aed398fc..f75ccaa9 100644 --- a/lib/mkdir.js +++ b/lib/mkdir.js @@ -8,6 +8,7 @@ const mkdirp = require('mkdirp') const fs = require('fs') const path = require('path') const chownr = require('chownr') +const normPath = require('./normalize-windows-path.js') class SymlinkError extends Error { constructor (symlink, path) { @@ -33,7 +34,11 @@ class CwdError extends Error { } } +const cGet = (cache, key) => cache.get(normPath(key)) +const cSet = (cache, key, val) => cache.set(normPath(key), val) + module.exports = (dir, opt, cb) => { + dir = normPath(dir) // if there's any overlap between mask and mode, // then we'll need an explicit chmod const umask = opt.umask @@ -49,13 +54,13 @@ module.exports = (dir, opt, cb) => { const preserve = opt.preserve const unlink = opt.unlink const cache = opt.cache - const cwd = opt.cwd + const cwd = normPath(opt.cwd) const done = (er, created) => { if (er) cb(er) else { - cache.set(dir, true) + cSet(cache, dir, true) if (created && doChown) chownr(created, uid, gid, er => done(er)) else if (needChmod) @@ -65,7 +70,7 @@ module.exports = (dir, opt, cb) => { } } - if (cache && cache.get(dir) === true) + if (cache && cGet(cache, dir) === true) return done() if (dir === cwd) { @@ -80,7 +85,7 @@ module.exports = (dir, opt, cb) => { return mkdirp(dir, {mode}).then(made => done(null, made), done) const sub = path.relative(cwd, dir) - const parts = sub.split(/\/|\\/) + const parts = sub.split('/') mkdir_(cwd, parts, mode, cache, unlink, cwd, null, done) } @@ -89,7 +94,7 @@ const mkdir_ = (base, parts, mode, cache, unlink, cwd, created, cb) => { return cb(null, created) const p = parts.shift() const part = base + '/' + p - if (cache.get(part)) + if (cGet(cache, part)) return mkdir_(part, parts, mode, cache, unlink, cwd, created, cb) fs.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)) } @@ -123,6 +128,7 @@ const onmkdir = (part, parts, mode, cache, unlink, cwd, created, cb) => er => { } module.exports.sync = (dir, opt) => { + dir = normPath(dir) // if there's any overlap between mask and mode, // then we'll need an explicit chmod const umask = opt.umask @@ -138,17 +144,17 @@ module.exports.sync = (dir, opt) => { const preserve = opt.preserve const unlink = opt.unlink const cache = opt.cache - const cwd = opt.cwd + const cwd = normPath(opt.cwd) const done = (created) => { - cache.set(dir, true) + cSet(cache, dir, true) if (created && doChown) chownr.sync(created, uid, gid) if (needChmod) fs.chmodSync(dir, mode) } - if (cache && cache.get(dir) === true) + if (cache && cGet(cache, dir) === true) return done() if (dir === cwd) { @@ -170,18 +176,18 @@ module.exports.sync = (dir, opt) => { return done(mkdirp.sync(dir, mode)) const sub = path.relative(cwd, dir) - const parts = sub.split(/\/|\\/) + const parts = sub.split('/') let created = null for (let p = parts.shift(), part = cwd; p && (part += '/' + p); p = parts.shift()) { - if (cache.get(part)) + if (cGet(cache, part)) continue try { fs.mkdirSync(part, mode) created = created || part - cache.set(part, true) + cSet(cache, part, true) } catch (er) { if (er.path && path.dirname(er.path) === cwd && (er.code === 'ENOTDIR' || er.code === 'ENOENT')) @@ -189,13 +195,13 @@ module.exports.sync = (dir, opt) => { const st = fs.lstatSync(part) if (st.isDirectory()) { - cache.set(part, true) + cSet(cache, part, true) continue } else if (unlink) { fs.unlinkSync(part) fs.mkdirSync(part, mode) created = created || part - cache.set(part, true) + cSet(cache, part, true) continue } else if (st.isSymbolicLink()) return new SymlinkError(part, part + '/' + parts.join('/')) diff --git a/lib/normalize-windows-path.js b/lib/normalize-windows-path.js new file mode 100644 index 00000000..8e3c30a0 --- /dev/null +++ b/lib/normalize-windows-path.js @@ -0,0 +1,8 @@ +// on windows, either \ or / are valid directory separators. +// on unix, \ is a valid character in filenames. +// so, on windows, and only on windows, we replace all \ chars with /, +// so that we can use / as our one and only directory separator char. + +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform +module.exports = platform !== 'win32' ? p => p + : p => p.replace(/\\/g, '/') diff --git a/lib/pack.js b/lib/pack.js index df7f9f1c..9522c10b 100644 --- a/lib/pack.js +++ b/lib/pack.js @@ -54,6 +54,7 @@ const ONDRAIN = Symbol('ondrain') const fs = require('fs') const path = require('path') const warner = require('./warn-mixin.js') +const normPath = require('./normalize-windows-path.js') const Pack = warner(class Pack extends MiniPass { constructor (opt) { @@ -66,7 +67,7 @@ const Pack = warner(class Pack extends MiniPass { this.preservePaths = !!opt.preservePaths this.strict = !!opt.strict this.noPax = !!opt.noPax - this.prefix = (opt.prefix || '').replace(/(\\|\/)+$/, '') + this.prefix = normPath(opt.prefix || '') this.linkCache = opt.linkCache || new Map() this.statCache = opt.statCache || new Map() this.readdirCache = opt.readdirCache || new Map() @@ -133,7 +134,7 @@ const Pack = warner(class Pack extends MiniPass { } [ADDTARENTRY] (p) { - const absolute = path.resolve(this.cwd, p.path) + const absolute = normPath(path.resolve(this.cwd, p.path)) // in this case, we don't have to wait for the stat if (!this.filter(p.path, p)) p.resume() @@ -149,7 +150,7 @@ const Pack = warner(class Pack extends MiniPass { } [ADDFSENTRY] (p) { - const absolute = path.resolve(this.cwd, p) + const absolute = normPath(path.resolve(this.cwd, p)) this[QUEUE].push(new PackJob(p, absolute)) this[PROCESS]() } diff --git a/lib/path-reservations.js b/lib/path-reservations.js index c0a16b0a..1929ac9c 100644 --- a/lib/path-reservations.js +++ b/lib/path-reservations.js @@ -7,6 +7,7 @@ // while still allowing maximal safe parallelization. const assert = require('assert') +const normPath = require('./normalize-windows-path.js') module.exports = () => { // path => [function or Set] @@ -20,8 +21,9 @@ module.exports = () => { // return a set of parent dirs for a given path const { join } = require('path') const getDirs = path => - join(path).split(/[\\/]/).slice(0, -1).reduce((set, path) => - set.length ? set.concat(join(set[set.length - 1], path)) : [path], []) + normPath(join(path)).split('/').slice(0, -1).reduce((set, path) => + set.length ? set.concat(normPath(join(set[set.length - 1], path))) + : [path], []) // functions currently running const running = new Set() diff --git a/lib/unpack.js b/lib/unpack.js index 282e04f9..38a7bbaf 100644 --- a/lib/unpack.js +++ b/lib/unpack.js @@ -15,6 +15,7 @@ const mkdir = require('./mkdir.js') const wc = require('./winchars.js') const pathReservations = require('./path-reservations.js') const stripAbsolutePath = require('./strip-absolute-path.js') +const normPath = require('./normalize-windows-path.js') const ONENTRY = Symbol('onEntry') const CHECKFS = Symbol('checkFs') @@ -91,6 +92,17 @@ const uint32 = (a, b, c) => : b === b >>> 0 ? b : c +const pruneCache = (cache, abs) => { + // clear the cache if it's a case-insensitive match, since we can't + // know if the current file system is case-sensitive or not. + abs = normPath(abs).toLowerCase() + for (const path of cache.keys()) { + const plower = path.toLowerCase() + if (plower === abs || plower.toLowerCase().indexOf(abs + '/') === 0) + cache.delete(path) + } +} + class Unpack extends Parser { constructor (opt) { if (!opt) @@ -168,7 +180,7 @@ class Unpack extends Parser { // links, and removes symlink directories rather than erroring this.unlink = !!opt.unlink - this.cwd = path.resolve(opt.cwd || process.cwd()) + this.cwd = normPath(path.resolve(opt.cwd || process.cwd())) this.strip = +opt.strip || 0 // if we're not chmodding, then we don't need the process umask this.processUmask = opt.noChmod ? 0 : process.umask() @@ -201,7 +213,7 @@ class Unpack extends Parser { [CHECKPATH] (entry) { if (this.strip) { - const parts = entry.path.split(/\/|\\/) + const parts = normPath(entry.path).split('/') if (parts.length < this.strip) return false entry.path = parts.slice(this.strip).join('/') @@ -209,15 +221,15 @@ class Unpack extends Parser { return false if (entry.type === 'Link') { - const linkparts = entry.linkpath.split(/\/|\\/) + const linkparts = normPath(entry.linkpath).split('/') if (linkparts.length >= this.strip) entry.linkpath = linkparts.slice(this.strip).join('/') } } if (!this.preservePaths) { - const p = entry.path - if (p.match(/(^|\/|\\)\.\.(\\|\/|$)/)) { + const p = normPath(entry.path) + if (p.split('/').includes('..')) { this.warn('TAR_ENTRY_ERROR', `path contains '..'`, { entry, path: p, @@ -245,9 +257,9 @@ class Unpack extends Parser { } if (path.isAbsolute(entry.path)) - entry.absolute = entry.path + entry.absolute = normPath(entry.path) else - entry.absolute = path.resolve(this.cwd, entry.path) + entry.absolute = normPath(path.resolve(this.cwd, entry.path)) return true } @@ -293,7 +305,7 @@ class Unpack extends Parser { } [MKDIR] (dir, mode, cb) { - mkdir(dir, { + mkdir(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, @@ -451,7 +463,8 @@ class Unpack extends Parser { } [HARDLINK] (entry, done) { - this[LINK](entry, path.resolve(this.cwd, entry.linkpath), 'link', done) + const linkpath = normPath(path.resolve(this.cwd, entry.linkpath)) + this[LINK](entry, linkpath, 'link', done) } [PEND] () { @@ -493,14 +506,8 @@ class Unpack extends Parser { // then that means we are about to delete the directory we created // previously, and it is no longer going to be a directory, and neither // is any of its children. - if (entry.type !== 'Directory') { - for (const path of this.dirCache.keys()) { - if (path === entry.absolute || - path.indexOf(entry.absolute + '/') === 0 || - path.indexOf(entry.absolute + '\\') === 0) - this.dirCache.delete(path) - } - } + if (entry.type !== 'Directory') + pruneCache(this.dirCache, entry.absolute) this[MKDIR](path.dirname(entry.absolute), this.dmode, er => { if (er) { @@ -556,7 +563,7 @@ class Unpack extends Parser { } [LINK] (entry, linkpath, link, done) { - // XXX: get the type ('file' or 'dir') for windows + // XXX: get the type ('symlink' or 'junction') for windows fs[link](linkpath, entry.absolute, er => { if (er) this[ONERROR](er, entry) @@ -571,14 +578,8 @@ class Unpack extends Parser { class UnpackSync extends Unpack { [CHECKFS] (entry) { - if (entry.type !== 'Directory') { - for (const path of this.dirCache.keys()) { - if (path === entry.absolute || - path.indexOf(entry.absolute + '/') === 0 || - path.indexOf(entry.absolute + '\\') === 0) - this.dirCache.delete(path) - } - } + if (entry.type !== 'Directory') + pruneCache(this.dirCache, entry.absolute) const er = this[MKDIR](path.dirname(entry.absolute), this.dmode, neverCalled) if (er) @@ -700,7 +701,7 @@ class UnpackSync extends Unpack { [MKDIR] (dir, mode) { try { - return mkdir.sync(dir, { + return mkdir.sync(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, diff --git a/lib/write-entry.js b/lib/write-entry.js index 0457f3ae..598fe8ee 100644 --- a/lib/write-entry.js +++ b/lib/write-entry.js @@ -4,12 +4,14 @@ const Pax = require('./pax.js') const Header = require('./header.js') const fs = require('fs') const path = require('path') +const normPath = require('./normalize-windows-path.js') +const stripSlash = require('./strip-trailing-slashes.js') const prefixPath = (path, prefix) => { if (!prefix) return path - path = path.replace(/^\.([/\\]|$)/, '') - return prefix + '/' + path + path = normPath(path).replace(/^\.(\/|$)/, '') + return stripSlash(prefix) + '/' + path } const maxReadSize = 16 * 1024 * 1024 @@ -43,7 +45,7 @@ const WriteEntry = warner(class WriteEntry extends MiniPass { super(opt) if (typeof p !== 'string') throw new TypeError('path is required') - this.path = p + this.path = normPath(p) // suppress atime, ctime, uid, gid, uname, gname this.portable = !!opt.portable // until node has builtin pwnam functions, this'll have to do @@ -87,7 +89,7 @@ const WriteEntry = warner(class WriteEntry extends MiniPass { p = p.replace(/\\/g, '/') } - this.absolute = opt.absolute || path.resolve(this.cwd, p) + this.absolute = normPath(opt.absolute || path.resolve(this.cwd, p)) if (this.path === '') this.path = './' diff --git a/test/normalize-windows-path.js b/test/normalize-windows-path.js new file mode 100644 index 00000000..3081fa0a --- /dev/null +++ b/test/normalize-windows-path.js @@ -0,0 +1,26 @@ +const t = require('tap') + +const realPlatform = process.platform +const fakePlatform = realPlatform === 'win32' ? 'posix' : 'win32' + +t.test('posix', t => { + if (realPlatform === 'win32') + process.env.TESTING_TAR_FAKE_PLATFORM = fakePlatform + else + delete process.env.TESTING_TAR_FAKE_PLATFORM + const normPath = t.mock('../lib/normalize-windows-path.js') + t.equal(normPath('/some/path/back\\slashes'), '/some/path/back\\slashes') + t.equal(normPath('c:\\foo\\bar'), 'c:\\foo\\bar') + t.end() +}) + +t.test('win32', t => { + if (realPlatform !== 'win32') + process.env.TESTING_TAR_FAKE_PLATFORM = fakePlatform + else + delete process.env.TESTING_TAR_FAKE_PLATFORM + const normPath = t.mock('../lib/normalize-windows-path.js') + t.equal(normPath('/some/path/back\\slashes'), '/some/path/back/slashes') + t.equal(normPath('c:\\foo\\bar'), 'c:/foo/bar') + t.end() +})