Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
352 lines (295 sloc) 11.1 KB
{-# LANGUAGE StandaloneDeriving, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ImplicitParams, ExistentialQuantification, PatternGuards #-}
module Main where
import OpenAFP
import Control.Applicative
import System.Exit
import System.Environment
import Text.LineToPDF.Internals hiding (Encoding)
-- import System.Posix.Resource
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Char8 as C
import qualified Text.LineToPDF.Internals as PDF
deriving instance Eq Encoding
deriving instance Ord Encoding
deriving instance Ix Encoding
deriving instance Bounded Encoding
__1GB__ :: Integer
__1GB__ = 1024 * 1024 * 1024
-- mkLimit :: Integer -> ResourceLimits
-- mkLimit x = ResourceLimits (ResourceLimit x) (ResourceLimit x)
main :: IO ()
main = do
-- setResourceLimit ResourceTotalMemory (mkLimit __1GB__)
-- setResourceLimit ResourceCoreFileSize (mkLimit 0)
hSetBinaryMode stdout True
args <- getArgs
when (null args) $ do
putStrLn "Usage: afp2line2pdf input.afp > output.pdf"
forM_ args $ \f -> do
ft <- guessFileType f
processFile ft f
data FileType = F_ASCII | F_EBCDIC | F_AFP | F_PDF | F_Unknown deriving Show
-- | Read the first byte to determine its file type:
--
-- * '1' indicates ASCII Plain Text line data
--
-- * '\xF1' indicates EBCDIC line data
--
-- * 'Z' indicates AFP file
--
-- * '%' indicates PDF file
--
guessFileType :: FilePath -> IO FileType
guessFileType fn = do
fh <- openBinaryFile fn ReadMode
bs <- S.hGet fh 1
hClose fh
return $ if S.null bs then F_Unknown else case C.head bs of
'Z' -> F_AFP
'%' -> F_PDF
'1' -> F_ASCII
'0' -> F_ASCII
' ' -> F_ASCII
'\xF0' -> F_EBCDIC
'\xF1' -> F_EBCDIC
'@' -> F_EBCDIC
_ -> F_Unknown
imageChunkTypes :: [ChunkType]
imageChunkTypes =
[ chunkTypeOf _BII
, chunkTypeOf _BIM
, chunkTypeOf _MIO
, chunkTypeOf _IRD
, chunkTypeOf _IPD
]
_blanks :: [C.ByteString]
_blanks = map C.pack
[ "BLANK"
, "BLN"
, "blank"
, "bln"
]
isBlank :: [AFP_] -> Bool
isBlank cs = case find (~~ _BR) cs of
Just c -> let str = S.drop 2 (packAStr $ br (decodeChunk c)) in
any (`S.isPrefixOf` str) _blanks
_ -> False
processFile :: FileType -> FilePath -> IO ()
processFile F_ASCII _ = do
-- ...Look at each line's first byte to determine what to output...
return ()
processFile F_AFP f = do
cs <- readAFP f
env <- getEnvironment
case lookup "AFP2PDF2LINE_SKIP_IMAGE" env of
Just "1" | not (isBlank cs), any ((`elem` imageChunkTypes) . chunkType) cs -> do
-- We can't process image data, sorry.
exitWith (ExitFailure 1)
_ -> return ()
hSetBinaryMode stdout True
pr$ "%PDF-1.2\n" ++ "%\xE2\xE3\xCF\xD3\n"
(info, root, tPages, resources) <- writeHeader [Latin, Big5, ShiftJIS]
forM_ (filter (~~ _PGD) cs) $ \c -> do
let pgd = decodeChunk c
xSize = toPt $ pgd_XPageSize pgd
ySize = toPt $ pgd_YPageSize pgd
modifyIORef __MAX_WIDTH__ (max xSize)
modifyIORef __MAX_HEIGHT__ (max ySize)
forM_ (tail $ splitRecords _BPG cs) $ \pageChunks -> do
-- Start page
markObj $ \obj -> do
modifyRef __PAGE__ (obj:)
pr$ "/Type/Page"
pr$ "/Parent " ++ show tPages ++ " 0 R"
pr$ "/Resources " ++ show resources ++ " 0 R"
pr$ "/Contents " ++ show (succ obj) ++ " 0 R"
pr$ "/Rotate 0"
obj <- incrObj
markLocation obj
pr$ show obj ++ " 0 obj" ++ "<<"
pr$ "/Length " ++ show (succ obj) ++ " 0 R"
pr$ ">>" ++ "stream\n"
streamStart <- currentLocation
pr$ "BT\n";
pageChunks ..>
[ _PTX ... ptxDump
, _MCF ... mcfHandler
, _MCF1 ... mcf1Handler
]
-- End Page
pr$ "ET\n"
streamEnd <- currentLocation
pr$ "endstream\n" ++ "endobj\n"
obj' <- incrObj
markLocation obj'
pr$ show obj' ++ " 0 obj\n"
++ show (streamEnd - streamStart) ++ "\n"
++ "endobj\n"
pageObjs <-reverse <$> readRef __PAGE__
markLocation root
writeObj root $ do
pr$ "/Type/Catalog" ++ "/Pages "
pr$ show tPages ++ " 0 R"
maxPageWidth <- readIORef __MAX_WIDTH__
maxPageHeight <- readIORef __MAX_HEIGHT__
markLocation tPages
writeObj tPages $ do
pr$ "/Type/Pages" ++ "/Count "
pr$ show (length pageObjs)
++ "/MediaBox[0 0 "
++ show maxPageWidth ++ " " ++ show maxPageHeight
pr$ "]" ++ "/Kids["
pr$ concatMap ((++ " 0 R ") . show) pageObjs
pr$ "]"
xfer <- currentLocation
objCount <- incrObj
pr$ "xref\n" ++ "0 " ++ show objCount ++ "\n"
++ "0000000000 65535 f \r"
writeLocations
pr$ "trailer\n" ++ "<<" ++ "/Size "
pr$ show objCount
pr$ "/Root " ++ show root ++ " 0 R"
pr$ "/Info " ++ show info ++ " 0 R"
pr$ ">>\n" ++ "startxref\n"
pr$ show xfer ++ "\n" ++ "%%EOF\n"
processFile t f = warn $ "Unknown file type: " ++ show t ++ " (" ++ f ++ ")"
{-# NOINLINE __MAX_WIDTH__ #-}
__MAX_WIDTH__ :: IORef Float
__MAX_WIDTH__ = unsafePerformIO $ newIORef 0
{-# NOINLINE __MAX_HEIGHT__ #-}
__MAX_HEIGHT__ :: IORef Float
__MAX_HEIGHT__ = unsafePerformIO $ newIORef 0
{-# NOINLINE _CurrentLine #-}
_CurrentLine :: IORef Float
_CurrentLine = unsafePerformIO $ newIORef 0
{-# NOINLINE _CurrentColumn #-}
_CurrentColumn :: IORef Float
_CurrentColumn = unsafePerformIO $ newIORef 0
{-# NOINLINE _MaxColumn #-}
_MaxColumn :: IORef Int
_MaxColumn = unsafePerformIO $ newIORef 0
{-# NOINLINE _MaxLine #-}
_MaxLine :: IORef Int
_MaxLine = unsafePerformIO $ newIORef 0
{-# NOINLINE _MaxEncoding #-}
_MaxEncoding :: IORef Encoding
_MaxEncoding = unsafePerformIO $ newIORef CP37
{-# NOINLINE _MinFontSize #-}
_MinFontSize :: IORef Size
_MinFontSize = unsafePerformIO $ newIORef 12
lookupFontEncoding :: N1 -> IO (Maybe Encoding)
lookupFontEncoding = hashLookup _FontToEncoding
insertFonts :: [(N1, ByteString)] -> IO ()
insertFonts = mapM_ $ \(i, f) -> do
let (enc, sz) = fontInfoOf f
modifyIORef _MinFontSize $ \szMin -> case szMin of
0 -> sz
_ -> min szMin sz
modifyIORef _MaxEncoding (max enc)
hashInsert _FontToEncoding i enc
{-# NOINLINE _FontToEncoding #-}
_FontToEncoding :: HashTable N1 Encoding
_FontToEncoding = unsafePerformIO hashCreate
-- | Record font Id to Name mappings in MCF's RLI and FQN chunks.
mcfHandler :: MCF -> IO ()
mcfHandler r = do
readChunks r ..>
[ _MCF_T ... \mcf -> do
let cs = readChunks mcf
ids = [ t_rli (decodeChunk c) | c <- cs, c ~~ _T_RLI ]
fonts = [ t_fqn (decodeChunk c) | c <- cs, c ~~ _T_FQN ]
insertFonts (ids `zip` map packAStr fonts)
]
-- | Record font Id to Name mappings in MCF1's Data chunks.
mcf1Handler :: MCF1 -> IO ()
mcf1Handler r = do
insertFonts
[ (mcf1_CodedFontLocalId mcf1, packA8 $ mcf1_CodedFontName mcf1)
| Record mcf1 <- readData r
]
ptxDump :: PTX -> IO ()
ptxDump ptx = mapM_ ptxGroupDump . splitRecords _PTX_SCFL $ readChunks ptx
convertEncoding :: Encoding -> PDF.Encoding
convertEncoding CP37 = Latin
convertEncoding CP835 = Big5
convertEncoding CP939 = ShiftJIS
convertEncoding CP950 = Big5
ptxGroupDump :: [PTX_] -> IO ()
ptxGroupDump (scfl:cs) | scfl ~~ _PTX_SCFL = do
let scflId = ptx_scfl (decodeChunk scfl)
rv <- lookupFontEncoding scflId
case rv of
Nothing -> return ()
Just curEncoding -> do
height <- readIORef __MAX_HEIGHT__
curSize <- readIORef _MinFontSize
let display (enc, getBStr) = do
font <- case enc of
Latin -> do
(fontOf . convertEncoding) <$> readIORef _MaxEncoding
_ -> return (fontOf enc)
pr$ "/F" ++ font ++ " " ++ show curSize ++ " Tf\n"
pr$ show curSize ++ " TL\n"
col <- readIORef _CurrentColumn
ln <- readIORef _CurrentLine
pr$ "1 0 0 1 " ++ show (col - 1) ++ " " ++ show (height - ln - 1) ++ " Tm("
let escape ch = C.intercalate (C.pack ['\\', ch]) . C.split ch
escapeTxt = escape '(' . escape ')' . escape '\\'
origBStr <- getBStr
let origLen = S.length origBStr
escapedLen = S.length escapedBStr
escapedBStr = escapeTxt origBStr
C.putStr escapedBStr
modifyRef __POS__ (+ escapedLen)
pr$ ")Tj\n"
modifyIORef _CurrentColumn (+ (realToFrac (origLen * curSize) * 0.5))
cs ..>
[ _PTX_TRN ... \trn -> display $ case curEncoding of
CP37 -> (Latin, return $ packAStr' (ptx_trn trn))
CP835 -> (Big5, pack835 (ptx_trn trn))
CP939 -> (ShiftJIS, pack939 (ptx_trn trn))
CP950 -> (Big5, return $ packBuf (ptx_trn trn))
, _PTX_BLN ... \_ -> do
writeIORef _CurrentColumn 0 -- XXX
modifyIORef _CurrentLine (+ realToFrac curSize)
, _PTX_AMB ... \x -> do
-- hPrint stderr ("AMB", fromEnum $ ptx_amb x)
movePosition Absolute _CurrentLine . ptx_amb $ x
, _PTX_RMB ... \x -> do
-- hPrint stderr ("RMB", fromEnum $ ptx_rmb x)
movePosition Relative _CurrentLine . ptx_rmb $ x
, _PTX_AMI ... movePosition Absolute _CurrentColumn . ptx_ami
, _PTX_RMI ... movePosition Relative _CurrentColumn . ptx_rmi
]
ptxGroupDump _ = return ()
data Position = Absolute | Relative
movePosition :: Position -> IORef Float -> N2 -> IO ()
movePosition p ref n = do
case p of
Absolute -> writeIORef ref (toPt n)
Relative -> modifyIORef ref (+ (toPt n))
toPt :: Enum a => a -> Float
toPt x = realToFrac (fromEnum x) * 0.3
packAStr' :: AStr -> S.ByteString
packAStr' astr = S.map (ebc2ascIsPrintW8 !) (packBuf astr)
{-# INLINE pack835 #-}
{-# INLINE pack939 #-}
pack835, pack939 :: NStr -> IO S.ByteString
pack835 = packWith convert835to950
pack939 = packWith convert939to932
{-# INLINE packWith #-}
packWith :: (Int -> Int) -> NStr -> IO S.ByteString
packWith f nstr = S.unsafeUseAsCStringLen (packBuf nstr) $ \(src, len) -> S.create len $ \target -> do
let s = castPtr src
let t = castPtr target
forM_ [0..(len `div` 2)-1] $ \i -> do
hi <- peekByteOff s (i*2) :: IO Word8
lo <- peekByteOff s (i*2+1) :: IO Word8
let ch = f (fromEnum hi * 256 + fromEnum lo)
(hi', lo') = ch `divMod` 256
pokeByteOff t (i*2) (toEnum hi' :: Word8)
pokeByteOff t (i*2+1) (toEnum lo' :: Word8)
-- Target size = AFP size / 3