Skip to content

Commit

Permalink
Decode UTF8 for unEscapeString
Browse files Browse the repository at this point in the history
Fixes #86
  • Loading branch information
singpolyma committed Feb 6, 2013
1 parent bc0bdf4 commit f2168b1
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 4 deletions.
47 changes: 44 additions & 3 deletions Network/URI.hs
Original file line number Diff line number Diff line change
Expand Up @@ -970,9 +970,50 @@ escapeURIString p s = concatMap (escapeURIChar p) s
--
unEscapeString :: String -> String
unEscapeString [] = ""
unEscapeString ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
chr (digitToInt x1 * 16 + digitToInt x2) : unEscapeString s
unEscapeString (c:s) = c : unEscapeString s
unEscapeString s@(c:cs) = case unEscapeByte s of
Just (byte, rest) -> unEscapeUtf8 byte rest
Nothing -> c : unEscapeString cs

unEscapeByte :: String -> Maybe (Int, String)
unEscapeByte ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
Just (digitToInt x1 * 16 + digitToInt x2, s)
unEscapeByte _ = Nothing

-- Adapted from http://hackage.haskell.org/package/utf8-string
-- by Eric Mertens, BSD3
unEscapeUtf8 :: Int -> String -> String
unEscapeUtf8 c rest
| c < 0x80 = chr c : unEscapeString rest
| c < 0xc0 = replacement_character : unEscapeString rest
| c < 0xe0 = multi1
| c < 0xf0 = multi_byte 2 0xf 0x800
| c < 0xf8 = multi_byte 3 0x7 0x10000
| c < 0xfc = multi_byte 4 0x3 0x200000
| c < 0xfe = multi_byte 5 0x1 0x4000000
| otherwise = replacement_character : unEscapeString rest
where
replacement_character = '\xfffd'
multi1 = case unEscapeByte rest of
Just (c1, ds) | c1 .&. 0xc0 == 0x80 ->
let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
in if d >= 0x000080 then toEnum d : unEscapeString ds
else replacement_character : unEscapeString ds
_ -> replacement_character : unEscapeString rest

multi_byte i mask overlong =
aux i rest (unEscapeByte rest) (c .&. mask)
where
aux 0 rs _ acc
| overlong <= acc && acc <= 0x10ffff &&
(acc < 0xd800 || 0xdfff < acc) &&
(acc < 0xfffe || 0xffff < acc) = chr acc : unEscapeString rs
| otherwise = replacement_character : unEscapeString rs

aux n _ (Just (r, rs)) acc
| r .&. 0xc0 == 0x80 = aux (n-1) rs (unEscapeByte rs)
$! shiftL acc 6 .|. (r .&. 0x3f)

aux _ rs _ _ = replacement_character : unEscapeString rs

------------------------------------------------------------
-- Resolving a relative URI relative to a base URI
Expand Down
3 changes: 2 additions & 1 deletion network.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ test-suite uri
HUnit,
network,
test-framework,
test-framework-hunit
test-framework-hunit,
test-framework-quickcheck2

source-repository head
type: git
Expand Down
8 changes: 8 additions & 0 deletions tests/uri001.hs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import Data.Maybe (fromJust)
import System.IO (openFile, IOMode(WriteMode), hClose)
import qualified Test.Framework as TF
import qualified Test.Framework.Providers.HUnit as TF
import qualified Test.Framework.Providers.QuickCheck2 as TF

-- Test supplied string for valid URI reference syntax
-- isValidURIRef :: String -> Bool
Expand Down Expand Up @@ -1103,13 +1104,20 @@ testEscapeURIString06 = testEq "testEscapeURIString06"
"hello%C3%B8%C2%A9%E6%97%A5%E6%9C%AC"
(escapeURIString isUnescapedInURIComponent "helloø©日本")

propEscapeUnEscapeLoop :: String -> Bool
propEscapeUnEscapeLoop s = s == (unEscapeString $! escaped)
where
escaped = escapeURIString (const False) s
{-# NOINLINE escaped #-}

testEscapeURIString = TF.testGroup "testEscapeURIString"
[ TF.testCase "testEscapeURIString01" testEscapeURIString01
, TF.testCase "testEscapeURIString02" testEscapeURIString02
, TF.testCase "testEscapeURIString03" testEscapeURIString03
, TF.testCase "testEscapeURIString04" testEscapeURIString04
, TF.testCase "testEscapeURIString05" testEscapeURIString05
, TF.testCase "testEscapeURIString06" testEscapeURIString06
, TF.testProperty "propEscapeUnEscapeLoop" propEscapeUnEscapeLoop
]

-- URI string normalization tests
Expand Down

0 comments on commit f2168b1

Please sign in to comment.