-
Notifications
You must be signed in to change notification settings - Fork 0
/
afp2line.hs
218 lines (192 loc) · 7.49 KB
/
afp2line.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
module Main where
import OpenAFP
import Data.Monoid
import qualified Data.IntMap as IM
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Char8 as C
--
-- import System.Posix.Resource
--
-- __1GB__ :: Integer
-- __1GB__ = 1024 * 1024 * 1024
--
-- mkLimit :: Integer -> ResourceLimits
-- mkLimit x = ResourceLimits (ResourceLimit x) (ResourceLimit x)
--
main :: IO ()
main = do
-- setResourceLimit ResourceTotalMemory (mkLimit __1GB__)
-- setResourceLimit ResourceCoreFileSize (mkLimit 0)
hSetBinaryMode stdout True
args <- getArgs
when (null args) $ do
putStrLn "Usage: afp2line input.afp ... > output.txt"
forM_ args $ \f -> do
ft <- guessFileType f
processFile ft f
data FileType = F_ASCII | F_EBCDIC | F_AFP | F_PDF | F_Unknown deriving Show
guessFileType :: FilePath -> IO FileType
guessFileType fn = do
fh <- openBinaryFile fn ReadMode
bs <- S.hGet fh 1
hClose fh
return $ if S.null bs then F_Unknown else case C.head bs of
'Z' -> F_AFP
'%' -> F_PDF
'1' -> F_ASCII
'0' -> F_ASCII
' ' -> F_ASCII
'\xF0' -> F_EBCDIC
'\xF1' -> F_EBCDIC
'@' -> F_EBCDIC
_ -> F_Unknown
processFile :: FileType -> FilePath -> IO ()
processFile F_ASCII f = do
-- ...Look at each line's first byte to determine what to output...
return ()
processFile F_AFP f = do
-- Read the first byte to determine its file type:
-- '1' indicates ASCII Plain Text line data
-- '\xF1' indicates EBCDIC line data
-- 'Z' indicates AFP file
-- '%' indicates PDF file
cs <- readAFP f
forM_ (splitRecords _PGD cs) $ \page -> do
page ..>
[ _PTX ... ptxDump
, _MCF ... mcfHandler
, _MCF1 ... mcf1Handler
]
dumpPageContent
processFile t f = warn $ "Unknown file type: " ++ show t ++ " (" ++ f ++ ")"
dumpPageContent :: IO ()
dumpPageContent = do
MkPage pg <- readIORef _CurrentPage
writeIORef _CurrentPage mempty
if IM.null pg then return () else do
forM_ (IM.elems pg) $ \(MkLine line) -> do
writeIORef _CurrentColumn 0
forM_ (IM.toAscList line) $ \(col, str) -> do
cur <- readIORef _CurrentColumn
S.putStr $ S.take (col - cur) _Spaces
S.putStr str
writeIORef _CurrentColumn (col + S.length str)
S.putStr _NewLine
S.putStr _NewPage
_Spaces, _NewLine, _NewPage :: ByteString
_Spaces = S.replicate 4096 0x20
_NewLine = C.pack "\r\n"
_NewPage = C.pack "\r\n\x0C\r\n"
{-# NOINLINE _CurrentPage #-}
_CurrentPage :: IORef Page
_CurrentPage = unsafePerformIO $ newIORef mempty
{-# NOINLINE _CurrentLine #-}
_CurrentLine :: IORef Int
_CurrentLine = unsafePerformIO $ newIORef 0
{-# NOINLINE _CurrentColumn #-}
_CurrentColumn :: IORef Int
_CurrentColumn = unsafePerformIO $ newIORef 0
{-# NOINLINE _MinFontSize #-}
_MinFontSize :: IORef Size
_MinFontSize = unsafePerformIO $ newIORef 0
lookupFontEncoding :: N1 -> IO (Maybe Encoding)
lookupFontEncoding = hashLookup _FontToEncoding
insertFonts :: [(N1, ByteString)] -> IO ()
insertFonts = mapM_ $ \(i, f) -> do
let (enc, sz) = fontInfoOf f
modifyIORef _MinFontSize $ \szMin -> case szMin of
0 -> sz
_ -> min szMin sz
hashInsert _FontToEncoding i enc
{-# NOINLINE _FontToEncoding #-}
_FontToEncoding :: HashTable N1 Encoding
_FontToEncoding = unsafePerformIO hashCreate
-- | Record font Id to Name mappings in MCF's RLI and FQN chunks.
mcfHandler :: MCF -> IO ()
mcfHandler r = do
readChunks r ..>
[ _MCF_T ... \mcf -> do
let cs = readChunks mcf
ids = [ t_rli (decodeChunk c) | c <- cs, c ~~ _T_RLI ]
fonts = [ t_fqn (decodeChunk c) | c <- cs, c ~~ _T_FQN ]
insertFonts (ids `zip` map packAStr fonts)
]
-- | Record font Id to Name mappings in MCF1's Data chunks.
mcf1Handler :: MCF1 -> IO ()
mcf1Handler r = do
insertFonts
[ (mcf1_CodedFontLocalId mcf1, packA8 $ mcf1_CodedFontName mcf1)
| Record mcf1 <- readData r
]
ptxDump :: PTX -> IO ()
ptxDump ptx = mapM_ ptxGroupDump . splitRecords _PTX_SCFL $ readChunks ptx
-- A Page is a IntMap from line-number to a map from column-number to bytestring.
newtype Page = MkPage { fromPage :: IM.IntMap Line } deriving (Show, Monoid)
newtype Line = MkLine { lineStrs :: IM.IntMap S.ByteString } deriving Show
insertText :: S.ByteString -> IO ()
insertText str = do
ln <- readIORef _CurrentLine
col <- readIORef _CurrentColumn
modifyIORef _CurrentPage $ \(MkPage pg) -> MkPage $! case IM.lookup ln pg of
Nothing -> IM.insert ln (MkLine (IM.singleton col str)) pg
Just (MkLine im) -> IM.insert ln (MkLine (IM.insert col str im)) pg
ptxGroupDump :: [PTX_] -> IO ()
ptxGroupDump (scfl:cs) = do
let scflId = ptx_scfl (decodeChunk scfl)
curEncoding <- lookupFontEncoding scflId
cs ..>
[ _PTX_TRN ... \trn -> do
-- when (ptx_scfl (decodeChunk scfl) == 2) $ do
-- hPrint stderr (ptx_trn trn)
case curEncoding of
Just CP37 -> let bstr = packAStr' (ptx_trn trn) in do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP835 -> pack835 (ptx_trn trn) >>= \bstr -> do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP939 -> pack939 (ptx_trn trn) >>= \bstr -> do
-- C.hPut stderr bstr
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
Just CP950 -> let bstr = packBuf (ptx_trn trn) in do
insertText bstr
modifyIORef _CurrentColumn (+ S.length bstr)
_ -> fail "TRN without SCFL?"
, _PTX_BLN ... \_ -> do
writeIORef _CurrentColumn 0
modifyIORef _CurrentLine (+1)
, _PTX_AMB ... movePosition Absolute _CurrentLine . ptx_amb
, _PTX_RMB ... movePosition Relative _CurrentLine . ptx_rmb
, _PTX_AMI ... movePosition Absolute _CurrentColumn . ptx_ami
, _PTX_RMI ... movePosition Relative _CurrentColumn . ptx_rmi
]
data Position = Absolute | Relative
movePosition :: Position -> IORef Int -> N2 -> IO ()
movePosition p ref n = do
minSize <- readIORef _MinFontSize
let offset = fromEnum n `div` minSize
case p of
Absolute -> writeIORef ref offset
Relative -> modifyIORef ref (+ offset)
packAStr' :: AStr -> S.ByteString
packAStr' astr = S.map (ebc2ascIsPrintW8 !) (packBuf astr)
{-# INLINE pack835 #-}
{-# INLINE pack939 #-}
pack835, pack939 :: NStr -> IO S.ByteString
pack835 = packWith convert835to950
pack939 = packWith convert939to932
{-# INLINE packWith #-}
packWith :: (Int -> Int) -> NStr -> IO S.ByteString
packWith f nstr = S.unsafeUseAsCStringLen (packBuf nstr) $ \(src, len) -> S.create len $ \target -> do
let s = castPtr src
let t = castPtr target
forM_ [0..(len `div` 2)-1] $ \i -> do
hi <- peekByteOff s (i*2) :: IO Word8
lo <- peekByteOff s (i*2+1) :: IO Word8
let ch = f (fromEnum hi * 256 + fromEnum lo)
(hi', lo') = ch `divMod` 256
pokeByteOff t (i*2) (toEnum hi' :: Word8)
pokeByteOff t (i*2+1) (toEnum lo' :: Word8)