Skip to content

Commit

Permalink
Check upper code point limit when decoding 4-byte UTF-8 characters
Browse files Browse the repository at this point in the history
  • Loading branch information
Reinier Lamers authored and glguy committed Nov 16, 2009
1 parent f8d8d70 commit 97025cd
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Codec/Binary/UTF8/Generic.hs
Expand Up @@ -158,8 +158,8 @@ decode bs = do (c,cs) <- buncons bs
case get_follower d1 cs1 of
Just (d2, cs2) ->
case get_follower d2 cs2 of
Just (d,_) | d >= 0x10000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
Just (d,_) | d >= 0x10000 && d < 0x110000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
_ -> (replacement_char, 3)
_ -> (replacement_char, 2)
_ -> (replacement_char, 1)
Expand Down
4 changes: 2 additions & 2 deletions Data/ByteString/Lazy/UTF8.hs
Expand Up @@ -114,8 +114,8 @@ decode bs = do (c,cs) <- buncons bs
case get_follower d1 cs1 of
Just (d2, cs2) ->
case get_follower d2 cs2 of
Just (d,_) | d >= 0x10000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
Just (d,_) | d >= 0x10000 && d < 0x110000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
_ -> (replacement_char, 3)
_ -> (replacement_char, 2)
_ -> (replacement_char, 1)
Expand Down
4 changes: 2 additions & 2 deletions Data/ByteString/UTF8.hs
Expand Up @@ -113,8 +113,8 @@ decode bs = do (c,cs) <- buncons bs
case get_follower d1 cs1 of
Just (d2, cs2) ->
case get_follower d2 cs2 of
Just (d,_) | d >= 0x10000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
Just (d,_) | d >= 0x10000 && d < 0x110000 -> (toEnum d, 4)
| otherwise -> (replacement_char, 4)
_ -> (replacement_char, 3)
_ -> (replacement_char, 2)
_ -> (replacement_char, 1)
Expand Down

0 comments on commit 97025cd

Please sign in to comment.