Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
219 lines (192 sloc) 7.49 KB
module Main where
import OpenAFP
import Data.Monoid
import qualified Data.IntMap as IM
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Char8 as C
--
-- import System.Posix.Resource
--
-- __1GB__ :: Integer
-- __1GB__ = 1024 * 1024 * 1024
--
-- mkLimit :: Integer -> ResourceLimits
-- mkLimit x = ResourceLimits (ResourceLimit x) (ResourceLimit x)
--
main :: IO ()
main = do
-- setResourceLimit ResourceTotalMemory (mkLimit __1GB__)
-- setResourceLimit ResourceCoreFileSize (mkLimit 0)
hSetBinaryMode stdout True
args <- getArgs
when (null args) $ do
putStrLn "Usage: afp2line input.afp ... > output.txt"
forM_ args $ \f -> do
ft <- guessFileType f
processFile ft f
data FileType = F_ASCII | F_EBCDIC | F_AFP | F_PDF | F_Unknown deriving Show
guessFileType :: FilePath -> IO FileType
guessFileType fn = do
fh <- openBinaryFile fn ReadMode
bs <- S.hGet fh 1
hClose fh
return $ if S.null bs then F_Unknown else case C.head bs of
'Z' -> F_AFP
'%' -> F_PDF
'1' -> F_ASCII
'0' -> F_ASCII
' ' -> F_ASCII
'\xF0' -> F_EBCDIC
'\xF1' -> F_EBCDIC
'@' -> F_EBCDIC
_ -> F_Unknown
processFile :: FileType -> FilePath -> IO ()
processFile F_ASCII f = do
-- ...Look at each line's first byte to determine what to output...
return ()
processFile F_AFP f = do
-- Read the first byte to determine its file type:
-- '1' indicates ASCII Plain Text line data
-- '\xF1' indicates EBCDIC line data
-- 'Z' indicates AFP file
-- '%' indicates PDF file
cs <- readAFP f
forM_ (splitRecords _PGD cs) $ \page -> do
page ..>
[ _PTX ... ptxDump
, _MCF ... mcfHandler
, _MCF1 ... mcf1Handler
]
dumpPageContent
processFile t f = warn $ "Unknown file type: " ++ show t ++ " (" ++ f ++ ")"
dumpPageContent :: IO ()
dumpPageContent = do
MkPage pg <- readIORef _CurrentPage
writeIORef _CurrentPage mempty
if IM.null pg then return () else do
forM_ (IM.elems pg) $ \(MkLine line) -> do
writeIORef _CurrentColumn 0
forM_ (IM.toAscList line) $ \(col, str) -> do
cur <- readIORef _CurrentColumn
S.putStr $ S.take (col - cur) _Spaces
S.putStr str
writeIORef _CurrentColumn (col + S.length str)
S.putStr _NewLine
S.putStr _NewPage
_Spaces, _NewLine, _NewPage :: ByteString
_Spaces = S.replicate 4096 0x20
_NewLine = C.pack "\r\n"
_NewPage = C.pack "\r\n\x0C\r\n"
{-# NOINLINE _CurrentPage #-}
_CurrentPage :: IORef Page
_CurrentPage = unsafePerformIO $ newIORef mempty
{-# NOINLINE _CurrentLine #-}
_CurrentLine :: IORef Int
_CurrentLine = unsafePerformIO $ newIORef 0
{-# NOINLINE _CurrentColumn #-}
_CurrentColumn :: IORef Int
_CurrentColumn = unsafePerformIO $ newIORef 0
{-# NOINLINE _MinFontSize #-}
_MinFontSize :: IORef Size
_MinFontSize = unsafePerformIO $ newIORef 0
lookupFontEncoding :: N1 -> IO (Maybe Encoding)
lookupFontEncoding = hashLookup _FontToEncoding
insertFonts :: [(N1, ByteString)] -> IO ()
insertFonts = mapM_ $ \(i, f) -> do
let (enc, sz) = fontInfoOf f
modifyIORef _MinFontSize $ \szMin -> case szMin of
0 -> sz
_ -> min szMin sz
hashInsert _FontToEncoding i enc
{-# NOINLINE _FontToEncoding #-}
_FontToEncoding :: HashTable N1 Encoding
_FontToEncoding = unsafePerformIO hashCreate
-- | Record font Id to Name mappings in MCF's RLI and FQN chunks.
mcfHandler :: MCF -> IO ()
mcfHandler r = do
readChunks r ..>
[ _MCF_T ... \mcf -> do
let cs = readChunks mcf
ids = [ t_rli (decodeChunk c) | c <- cs, c ~~ _T_RLI ]
fonts = [ t_fqn (decodeChunk c) | c <- cs, c ~~ _T_FQN ]
insertFonts (ids `zip` map packAStr fonts)
]
-- | Record font Id to Name mappings in MCF1's Data chunks.
mcf1Handler :: MCF1 -> IO ()
mcf1Handler r = do
insertFonts
[ (mcf1_CodedFontLocalId mcf1, packA8 $ mcf1_CodedFontName mcf1)
| Record mcf1 <- readData r
]
ptxDump :: PTX -> IO ()
ptxDump ptx = mapM_ ptxGroupDump . splitRecords _PTX_SCFL $ readChunks ptx
-- A Page is a IntMap from line-number to a map from column-number to bytestring.
newtype Page = MkPage { fromPage :: IM.IntMap Line } deriving (Show, Monoid)
newtype Line = MkLine { lineStrs :: IM.IntMap S.ByteString } deriving Show
insertText :: S.ByteString -> IO ()
insertText str = do
ln <- readIORef _CurrentLine
col <- readIORef _CurrentColumn
modifyIORef _CurrentPage $ \(MkPage pg) -> MkPage $! case IM.lookup ln pg of
Nothing -> IM.insert ln (MkLine (IM.singleton col str)) pg
Just (MkLine im) -> IM.insert ln (MkLine (IM.insert col str im)) pg
ptxGroupDump :: [PTX_] -> IO ()
ptxGroupDump (scfl:cs) = do
let scflId = ptx_scfl (decodeChunk scfl)
curEncoding <- lookupFontEncoding scflId
cs ..>
[ _PTX_TRN ... \trn -> do
-- when (ptx_scfl (decodeChunk scfl) == 2) $ do
-- hPrint stderr (ptx_trn trn)
case curEncoding of
Just CP37 -> let bstr = packAStr' (ptx_trn trn) in do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP835 -> pack835 (ptx_trn trn) >>= \bstr -> do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP939 -> pack939 (ptx_trn trn) >>= \bstr -> do
-- C.hPut stderr bstr
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP950 -> let bstr = packBuf (ptx_trn trn) in do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
_ -> fail "TRN without SCFL?"
, _PTX_BLN ... \_ -> do
writeIORef _CurrentColumn 0
modifyIORef _CurrentLine (+1)
, _PTX_AMB ... movePosition Absolute _CurrentLine . ptx_amb
, _PTX_RMB ... movePosition Relative _CurrentLine . ptx_rmb
, _PTX_AMI ... movePosition Absolute _CurrentColumn . ptx_ami
, _PTX_RMI ... movePosition Relative _CurrentColumn . ptx_rmi
]
data Position = Absolute | Relative
movePosition :: Position -> IORef Int -> N2 -> IO ()
movePosition p ref n = do
minSize <- readIORef _MinFontSize
let offset = fromEnum n `div` minSize
case p of
Absolute -> writeIORef ref offset
Relative -> modifyIORef ref (+ offset)
packAStr' :: AStr -> S.ByteString
packAStr' astr = S.map (ebc2ascIsPrintW8 !) (packBuf astr)
{-# INLINE pack835 #-}
{-# INLINE pack939 #-}
pack835, pack939 :: NStr -> IO S.ByteString
pack835 = packWith convert835to950
pack939 = packWith convert939to932
{-# INLINE packWith #-}
packWith :: (Int -> Int) -> NStr -> IO S.ByteString
packWith f nstr = S.unsafeUseAsCStringLen (packBuf nstr) $ \(src, len) -> S.create len $ \target -> do
let s = castPtr src
let t = castPtr target
forM_ [0..(len `div` 2)-1] $ \i -> do
hi <- peekByteOff s (i*2) :: IO Word8
lo <- peekByteOff s (i*2+1) :: IO Word8
let ch = f (fromEnum hi * 256 + fromEnum lo)
(hi', lo') = ch `divMod` 256
pokeByteOff t (i*2) (toEnum hi' :: Word8)
pokeByteOff t (i*2+1) (toEnum lo' :: Word8)