From eddedbfc14916aa06fc01ff04b38aeb30ae2e625 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 20 Jul 2023 09:26:38 -0700 Subject: [PATCH] Fix new variant of the vulnerability in CVE-2023-35936. Guilhem Moulin noticed that the fix to CVE-2023-35936 was incomplete. An attacker could get around it by double-encoding the malicious extension to create or override arbitrary files. $ echo '![](data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua)' >b.md $ .cabal/bin/pandoc b.md --extract-media=bar

$ cat b.lua print "hello" $ find bar bar/ bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+ This commit adds a test case for this more complex attack and fixes the vulnerability. (The fix is quite simple: if the URL-unescaped filename or extension contains a '%', we just use the sha1 hash of the contents as the canonical name, just as we do if the filename contains '..'.) --- src/Text/Pandoc/Class/IO.hs | 2 ++ src/Text/Pandoc/MediaBag.hs | 7 ++++--- test/Tests/MediaBag.hs | 12 +++++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 86ed83c89582..2ae3b5cee7ca 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -224,6 +224,8 @@ writeMedia :: (PandocMonad m, MonadIO m) -> m () writeMedia dir (fp, _mt, bs) = do -- we normalize to get proper path separators for the platform + -- we unescape URI encoding, but given how insertMedia + -- is written, we shouldn't have any % in a canonical media name... let fullpath = normalise $ dir unEscapeString fp liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) report $ Extracting (T.pack fullpath) diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index bb75f4591fa2..18a40a6dc10e 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -90,16 +90,17 @@ insertMedia fp mbMime contents (MediaBag mediamap) = && Windows.isRelative fp'' && isNothing uri && not (".." `isInfixOf` fp'') + && '%' `notElem` fp'' then fp'' - else showDigest (sha1 contents) <> "." <> ext + else showDigest (sha1 contents) <> ext fallback = case takeExtension fp'' of ".gz" -> getMimeTypeDef $ dropExtension fp'' _ -> getMimeTypeDef fp'' mt = fromMaybe fallback mbMime path = maybe fp'' (unEscapeString . uriPath) uri ext = case takeExtension path of - '.':e -> e - _ -> maybe "" T.unpack $ extensionFromMimeType mt + '.':e | '%' `notElem` e -> '.':e + _ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath diff --git a/test/Tests/MediaBag.hs b/test/Tests/MediaBag.hs index 65b6716d9a73..4cb4ab807dc6 100644 --- a/test/Tests/MediaBag.hs +++ b/test/Tests/MediaBag.hs @@ -20,7 +20,7 @@ tests = [ let d = B.doc $ B.para (B.image "../../test/lalune.jpg" "" mempty) <> B.para (B.image "moon.jpg" "" mempty) <> - B.para (B.image "data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%2f%2e%2e%2f%2e%2e%2fa%2elua" "" mempty) <> + B.para (B.image "data:image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%2f%2e%2e%2f%2e%2e%2fa%2elua" "" mempty) <> B.para (B.image "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" "" mempty) runIOorExplode $ do fillMediaBag d @@ -35,4 +35,14 @@ tests = [ (exists3 && not exists4) exists5 <- doesFileExist ("foo" "d5fceb6532643d0d84ffe09c40c481ecdf59e15a.gif") assertBool "data uri with gif is not properly decoded" exists5 + -- double-encoded version: + let e = B.doc $ + B.para (B.image "data:image/png;base64,cHJpbnQgInB3bmVkIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua" "" mempty) + runIOorExplode $ do + fillMediaBag e + extractMedia "bar" e + exists6 <- doesFileExist ("bar" "772ceca21a2751863ec46cb23db0e7fc35b9cff8.png") + exists7 <- doesFileExist "b.lua" + assertBool "data uri with double-encoded malicious payload gets written outside of destination dir" + (exists6 && not exists7) ]