Skip to content

Commit eddedbf

Browse files
committed
Fix new variant of the vulnerability in CVE-2023-35936.
Guilhem Moulin noticed that the fix to CVE-2023-35936 was incomplete. An attacker could get around it by double-encoding the malicious extension to create or override arbitrary files. $ echo '![](data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua)' >b.md $ .cabal/bin/pandoc b.md --extract-media=bar <p><img src="bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+%2f%2e%2e%2f%2e%2e%2fb%2elua" /></p> $ cat b.lua print "hello" $ find bar bar/ bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+ This commit adds a test case for this more complex attack and fixes the vulnerability. (The fix is quite simple: if the URL-unescaped filename or extension contains a '%', we just use the sha1 hash of the contents as the canonical name, just as we do if the filename contains '..'.)
1 parent db2594a commit eddedbf

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

src/Text/Pandoc/Class/IO.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ writeMedia :: (PandocMonad m, MonadIO m)
224224
-> m ()
225225
writeMedia dir (fp, _mt, bs) = do
226226
-- we normalize to get proper path separators for the platform
227+
-- we unescape URI encoding, but given how insertMedia
228+
-- is written, we shouldn't have any % in a canonical media name...
227229
let fullpath = normalise $ dir </> unEscapeString fp
228230
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
229231
report $ Extracting (T.pack fullpath)

src/Text/Pandoc/MediaBag.hs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,17 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
9090
&& Windows.isRelative fp''
9191
&& isNothing uri
9292
&& not (".." `isInfixOf` fp'')
93+
&& '%' `notElem` fp''
9394
then fp''
94-
else showDigest (sha1 contents) <> "." <> ext
95+
else showDigest (sha1 contents) <> ext
9596
fallback = case takeExtension fp'' of
9697
".gz" -> getMimeTypeDef $ dropExtension fp''
9798
_ -> getMimeTypeDef fp''
9899
mt = fromMaybe fallback mbMime
99100
path = maybe fp'' (unEscapeString . uriPath) uri
100101
ext = case takeExtension path of
101-
'.':e -> e
102-
_ -> maybe "" T.unpack $ extensionFromMimeType mt
102+
'.':e | '%' `notElem` e -> '.':e
103+
_ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType mt
103104

104105
-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
105106
lookupMedia :: FilePath

test/Tests/MediaBag.hs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ tests = [
2020
let d = B.doc $
2121
B.para (B.image "../../test/lalune.jpg" "" mempty) <>
2222
B.para (B.image "moon.jpg" "" mempty) <>
23-
B.para (B.image "data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%2f%2e%2e%2f%2e%2e%2fa%2elua" "" mempty) <>
23+
B.para (B.image "data:image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%2f%2e%2e%2f%2e%2e%2fa%2elua" "" mempty) <>
2424
B.para (B.image "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" "" mempty)
2525
runIOorExplode $ do
2626
fillMediaBag d
@@ -35,4 +35,14 @@ tests = [
3535
(exists3 && not exists4)
3636
exists5 <- doesFileExist ("foo" </> "d5fceb6532643d0d84ffe09c40c481ecdf59e15a.gif")
3737
assertBool "data uri with gif is not properly decoded" exists5
38+
-- double-encoded version:
39+
let e = B.doc $
40+
B.para (B.image "data:image/png;base64,cHJpbnQgInB3bmVkIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua" "" mempty)
41+
runIOorExplode $ do
42+
fillMediaBag e
43+
extractMedia "bar" e
44+
exists6 <- doesFileExist ("bar" </> "772ceca21a2751863ec46cb23db0e7fc35b9cff8.png")
45+
exists7 <- doesFileExist "b.lua"
46+
assertBool "data uri with double-encoded malicious payload gets written outside of destination dir"
47+
(exists6 && not exists7)
3848
]

0 commit comments

Comments
 (0)