-
Notifications
You must be signed in to change notification settings - Fork 106
/
confuse.hs
41 lines (33 loc) · 1.43 KB
/
confuse.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
{-# LANGUAGE OverloadedStrings #-}
module Confuse where
import Numeric
import Data.Char
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Map as Map
hx :: [Char] -> String
hx cs = let ch = (chr . fst . last . readHex $ cs) in
case ch of
'\'' -> "''"
'\\' -> "\\\\"
c -> c : []
conv :: String -> B.ByteString
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
where
i :: String
i = hx s
r :: String
r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
where
u a = B.concat ["\\","u",a]
toPair :: String -> (B.ByteString, [B.ByteString])
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
main = do
ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
where
notTooLong = (>) 6 . length . snd
fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b