/
unpack-and-decrypt-pdf-file.hs
91 lines (80 loc) · 2.74 KB
/
unpack-and-decrypt-pdf-file.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{-# LANGUAGE OverloadedStrings #-}
module Main
(
main
)
where
import Pdf.Core.Object
import Pdf.Core.Exception
import Pdf.Core.Writer
import Pdf.Document
import Pdf.Document.Internal.Types
import Data.IORef
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.ByteString.Lazy as Lazy.ByteString
import qualified Data.Foldable as Foldable
import qualified Data.Vector as Vector
import qualified Data.IntSet as IntSet
import qualified Data.HashMap.Strict as HashMap
import Control.Monad
import Control.Exception
import System.Environment
import qualified System.IO.Streams as Streams
main :: IO ()
main = do
[input] <- getArgs
withPdfFile input $ \pdf -> do
encrypted <- isEncrypted pdf
when encrypted $ do
ok <- setUserPassword pdf defaultUserPassword
unless ok $
error "Wrong password"
Document _ tr <- document pdf
writer <- makeWriter Streams.stdout
stateRef <- newIORef IntSet.empty
let
loop (Dict vals) = Foldable.forM_ vals loop
loop (Array vals) = Vector.forM_ vals loop
loop (Ref r@(R index _)) = do
-- check that the object is not already written.
-- necessary to prevent circles
member <- IntSet.member index <$> readIORef stateRef
if member
then return ()
else do
o <- lookupObject pdf r
case o of
Stream s -> do
(d, dat) <- loadStream pdf r s
writeStream writer r d dat
_ -> writeObject writer r o
modifyIORef stateRef $ IntSet.insert index
loop o
loop (Stream (S d _)) = loop (Dict d)
loop _ = return ()
writeHeader writer
-- traverse all the objects starting from trailer
-- and write out all the indirect objects found
loop (Dict tr)
-- There are no more xrefs, so clean prev key
writeXRefTable writer 0 (HashMap.delete "Prev" tr)
loadStream :: Pdf -> Ref -> Stream -> IO (Dict, Lazy.ByteString)
loadStream pdf ref s = do
res <- loadDecodedStream pdf ref s
`catch` \Corrupted{} -> loadRawStream pdf ref s
return res
loadDecodedStream :: Pdf -> Ref -> Stream -> IO (Dict, Lazy.ByteString)
loadDecodedStream pdf ref s@(S d _) = do
is <- streamContent pdf ref s
cont <- Lazy.ByteString.fromChunks <$> Streams.toList is
-- update length and remove filter
let d' = HashMap.insert "Length" (Number len)
. HashMap.delete "Filter"
$ d
len = fromIntegral (Lazy.ByteString.length cont)
return (d', cont)
loadRawStream :: Pdf -> Ref -> Stream -> IO (Dict, Lazy.ByteString)
loadRawStream pdf r s@(S d _) = do
is <- rawStreamContent pdf r s
cont <- Lazy.ByteString.fromChunks <$> Streams.toList is
return (d, cont)