diff --git a/base32.cabal b/base32.cabal index e973170..aed092b 100644 --- a/base32.cabal +++ b/base32.cabal @@ -1,6 +1,6 @@ cabal-version: 2.0 name: base32 -version: 0.1.1.2 +version: 0.2.0.0 synopsis: RFC 4648-compliant Base32 encodings/decodings description: RFC 4648-compliant Base32 encodings and decodings. @@ -51,12 +51,12 @@ library Data.ByteString.Base32.Internal.Utils build-depends: - base >=4.10 && <5 - , bytestring ^>=0.10 - , deepseq >=1.4.3.0 && <1.4.5.0 - , ghc-byteorder ^>=4.11.0.0 - , text ^>=1.2 - , text-short ^>=0.1 + base >=4.10 && <5 + , bytestring ^>=0.10 + , deepseq >=1.4.3.0 && <1.4.5.0 + , ghc-byteorder ^>=4.11.0.0 + , text ^>=1.2 + , text-short ^>=0.1 hs-source-dirs: src default-language: Haskell2010 @@ -72,8 +72,8 @@ test-suite tasty base >=4.10 && <5 , base32 , bytestring - , QuickCheck , memory + , QuickCheck , random-bytestring , tasty , tasty-hunit diff --git a/src/Data/ByteString/Base32/Internal.hs b/src/Data/ByteString/Base32/Internal.hs index aa1e543..9b28fa5 100644 --- a/src/Data/ByteString/Base32/Internal.hs +++ b/src/Data/ByteString/Base32/Internal.hs @@ -38,16 +38,26 @@ import System.IO.Unsafe -- Validating Base64 validateBase32 :: ByteString -> ByteString -> Bool -validateBase32 !alphabet (PS fp off l) = - accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> - go (plusPtr p off) (plusPtr p (l + off)) +validateBase32 !alphabet bs@(PS _ _ l) + | l == 0 = True + | r == 0 = f bs + | r == 2 = f (BS.append bs "======") + | r == 4 = f (BS.append bs "====") + | r == 5 = f (BS.append bs "===") + | r == 7 = f (BS.append bs "=") + | otherwise = False where + r = l `rem` 8 + + f (PS fp o l') = accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> + go (plusPtr p o) (plusPtr p (l' + o)) + go !p !end | p == end = return True | otherwise = do w <- peek p - let f a + let check a | a == 0x3d, plusPtr p 1 == end = True | a == 0x3d, plusPtr p 2 == end = True | a == 0x3d, plusPtr p 3 == end = True @@ -57,7 +67,7 @@ validateBase32 !alphabet (PS fp off l) = | a == 0x3d = False | otherwise = BS.elem a alphabet - if f w then go (plusPtr p 1) end else return False + if check w then go (plusPtr p 1) end else return False {-# INLINE validateBase32 #-} -- | This function checks that the last N-chars of a bytestring are '=' diff --git a/src/Data/ByteString/Base32/Internal/Loop.hs b/src/Data/ByteString/Base32/Internal/Loop.hs index 1f1aaef..d01e8c3 100644 --- a/src/Data/ByteString/Base32/Internal/Loop.hs +++ b/src/Data/ByteString/Base32/Internal/Loop.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE TypeApplications #-} module Data.ByteString.Base32.Internal.Loop ( innerLoop @@ -77,31 +78,58 @@ decodeLoop !lut !dptr !sptr !end finish = go dptr sptr 0 where lix a = w64 (aix (fromIntegral a) lut) - roll !w !acc = (acc `unsafeShiftL` 5) .|. lix w + err :: Ptr Word64 -> IO (Either Text ByteString) + err p = return . Left . T.pack + $ "invalid character at offset: " + ++ show (p `minusPtr` sptr) - err = return . Left . T.pack + padErr :: Ptr Word64 -> IO (Either Text ByteString) + padErr p = return . Left . T.pack + $ "invalid padding at offset: " + ++ show (p `minusPtr` sptr) go !dst !src !n - | plusPtr src 8 == end = finish dst (castPtr src) n + | plusPtr src 8 >= end = finish dst (castPtr src) n | otherwise = do !t <- peekWord64BE src - !w <- return - $ roll (unsafeShiftR t 0) - $ roll (unsafeShiftR t 8) - $ roll (unsafeShiftR t 16) - $ roll (unsafeShiftR t 24) - $ roll (unsafeShiftR t 32) - $ roll (unsafeShiftR t 40) - $ roll (unsafeShiftR t 48) - $ roll (unsafeShiftR t 56) - 0 - - if w /= 0xff - then do - poke @Word8 dst (fromIntegral (w `unsafeShiftR` 32)) - poke @Word32 (castPtr (plusPtr dst 1)) (byteSwap32 (fromIntegral w)) - go (plusPtr dst 5) (plusPtr src 8) (n + 5) - else err - $ "invalid character at offset: " - ++ show (src `minusPtr` sptr) + let !a = lix (unsafeShiftR t 56) + !b = lix (unsafeShiftR t 48) + !c = lix (unsafeShiftR t 40) + !d = lix (unsafeShiftR t 32) + !e = lix (unsafeShiftR t 24) + !f = lix (unsafeShiftR t 16) + !g = lix (unsafeShiftR t 8) + !h = lix t + + if + | a == 0x63 -> padErr src + | b == 0x63 -> padErr (plusPtr src 1) + | c == 0x63 -> padErr (plusPtr src 2) + | d == 0x63 -> padErr (plusPtr src 3) + | e == 0x63 -> padErr (plusPtr src 4) + | f == 0x63 -> padErr (plusPtr src 5) + | g == 0x63 -> padErr (plusPtr src 6) + | h == 0x63 -> padErr (plusPtr src 7) + | a == 0xff -> err src + | b == 0xff -> err (plusPtr src 1) + | c == 0xff -> err (plusPtr src 2) + | d == 0xff -> err (plusPtr src 3) + | e == 0xff -> err (plusPtr src 4) + | f == 0xff -> err (plusPtr src 5) + | g == 0xff -> err (plusPtr src 6) + | h == 0xff -> err (plusPtr src 7) + | otherwise -> do + + let !w = ((unsafeShiftL a 35) + .|. (unsafeShiftL b 30) + .|. (unsafeShiftL c 25) + .|. (unsafeShiftL d 20) + .|. (unsafeShiftL e 15) + .|. (unsafeShiftL f 10) + .|. (unsafeShiftL g 5) + .|. h) :: Word64 + + poke @Word32 (castPtr dst) (byteSwap32 (fromIntegral (unsafeShiftR w 8))) + poke @Word8 (plusPtr dst 4) (fromIntegral w) + go (plusPtr dst 5) (plusPtr src 8) (n + 5) diff --git a/src/Data/ByteString/Base32/Internal/Tables.hs b/src/Data/ByteString/Base32/Internal/Tables.hs index 643bacf..eb0e246 100644 --- a/src/Data/ByteString/Base32/Internal/Tables.hs +++ b/src/Data/ByteString/Base32/Internal/Tables.hs @@ -18,7 +18,7 @@ stdDecodeTable = writeNPlainForeignPtrBytes @Word8 256 [ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff , 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff , 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff - , 0xff,0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff + , 0xff,0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0x63,0xff,0xff , 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e , 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff , 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e @@ -39,7 +39,7 @@ hexDecodeTable = writeNPlainForeignPtrBytes @Word8 256 [ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff , 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff , 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff - , 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff + , 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff,0xff,0x63,0xff,0xff , 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18 , 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff , 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18 diff --git a/test/Main.hs b/test/Main.hs index 82a058e..b055c36 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -125,6 +125,8 @@ mkUnitTree -> TestTree mkUnitTree = mkTests "Unit tests" [ rfcVectors + , offsetVectors + , validityTests ] -- | Make unit tests for textual 'decode*With' functions @@ -218,7 +220,7 @@ prop_mem_coherence = testGroup "prop_mem_coherence" -- | RFC 4328 test vectors -- rfcVectors :: forall a b proxy. Harness a b => proxy a -> TestTree -rfcVectors _ = testGroup "RFC 4328 Test Vectors" +rfcVectors _ = testGroup "RFC 4648 Test Vectors" [ testGroup "std alphabet" [ testCaseStd "" "" , testCaseStd "f" "MY======" @@ -284,6 +286,22 @@ decodeWithVectors utf8 _ _ = testGroup "DecodeWith* unit tests" case decodeHexWith_ @a utf8 "\1079743" of Left (DecodeError _) -> return () _ -> assertFailure "decoding phase" + , testCase "decodePaddedWith non-utf8 inputs on decodeUtf8" $ do + case decodePaddedWith_ @a utf8 "\1079743" of + Left (DecodeError _) -> return () + _ -> assertFailure "decoding phase" + , testCase "decodePaddedWith valid utf8 inputs on decodeUtf8" $ do + case decodePaddedWith_ @a utf8 (encode @t "\1079743") of + Left (ConversionError _) -> return () + _ -> assertFailure "conversion phase" + , testCase "decodeUnpaddedWith non-utf8 inputs on decodeUtf8" $ do + case decodeUnpaddedWith_ @a utf8 "\1079743" of + Left (DecodeError _) -> return () + _ -> assertFailure "decoding phase" + , testCase "decodeUnpaddedWith valid utf8 inputs on decodeUtf8" $ do + case decodeUnpaddedWith_ @a utf8 (encodeNopad @t "\1079743") of + Left (ConversionError _) -> return () + _ -> assertFailure "conversion phase" , testCase "decodeHexWith valid utf8 inputs on decodeUtf8" $ do case decodeHexWith_ @a utf8 (encodeHex @t "\1079743") of Left (ConversionError _) -> return () @@ -292,15 +310,15 @@ decodeWithVectors utf8 _ _ = testGroup "DecodeWith* unit tests" case decodeHexPaddedWith_ @a utf8 "\1079743" of Left (DecodeError _) -> return () _ -> assertFailure "decoding phase" - , testCase "decodePaddedWith valid utf8 inputs on decodeUtf8" $ do + , testCase "decodeHexPaddedWith valid utf8 inputs on decodeUtf8" $ do case decodeHexPaddedWith_ @a utf8 (encodeHex @t "\1079743") of Left (ConversionError _) -> return () _ -> assertFailure "conversion phase" - , testCase "decodeUnpaddedWith non-utf8 inputs on decodeUtf8" $ do + , testCase "decodeHexUnpaddedWith non-utf8 inputs on decodeUtf8" $ do case decodeHexUnpaddedWith_ @a utf8 "\1079743" of Left (DecodeError _) -> return () _ -> assertFailure "decoding phase" - , testCase "decodeUnpaddedWith valid utf8 inputs on decodeUtf8" $ do + , testCase "decodeHexUnpaddedWith valid utf8 inputs on decodeUtf8" $ do case decodeHexUnpaddedWith_ @a utf8 (encodeHexNopad @t "\1079743") of Left (ConversionError _) -> return () _ -> assertFailure "conversion phase" @@ -310,6 +328,14 @@ decodeWithVectors utf8 _ _ = testGroup "DecodeWith* unit tests" a <- either (assertFailure . show) pure $ decode @a "MZXW6YTBOI======" b <- either (assertFailure . show) pure $ decodeWith_ @a utf8 "MZXW6YTBOI======" a @=? b + , testCase "decodePaddedWith utf8 inputs on decodeUtf8" $ do + a <- either (assertFailure . show) pure $ decodePad @a "MZXW6YTBOI======" + b <- either (assertFailure . show) pure $ decodePaddedWith_ @a utf8 "MZXW6YTBOI======" + a @=? b + , testCase "decodeUnpaddedWith utf8 inputs on decodeUtf8" $ do + a <- either (assertFailure . show) pure $ decodeNopad @a "MZXW6YTBOI" + b <- either (assertFailure . show) pure $ decodeUnpaddedWith_ @a utf8 "MZXW6YTBOI" + a @=? b , testCase "decodeHexWith utf8 inputs on decodeUtf8" $ do a <- either (assertFailure . show) pure $ decodeHex @a "CPNMUOJ1E8======" b <- either (assertFailure . show) pure $ decodeHexWith_ @a utf8 "CPNMUOJ1E8======" @@ -324,3 +350,116 @@ decodeWithVectors utf8 _ _ = testGroup "DecodeWith* unit tests" a @=? b ] ] + +-- | Validity unit tests for the URL workflow +-- +validityTests :: forall a b proxy. Harness a b => proxy a -> TestTree +validityTests _ = testGroup "Validity and correctness unit tests" + [ testGroup "Validity unit tests" + [ testCase "Hex padding tests" $ do + not (validateHex @a "C") @? "C" + validateHex @a "CO" @? "CO" + validateHex @a "CPNG" @? "CPNG" + validateHex @a "CPNMU" @? "CPNMU" + validateHex @a "CPNMUOG" @? "CPNMUOG" + validateHex @a "CPNMUOJ1" @? "CPNMUOJ1" + validateHex @a "CPNMUOJ1E8" @? "CPNMUOJ1E8" + validateHex @a "CO======" @? "CO======" + validateHex @a "CPNG====" @? "CPNG====" + validateHex @a "CPNMU===" @? "CPNMU===" + validateHex @a "CPNMUOG=" @? "CPNMUOG=" + validateHex @a "CPNMUOJ1" @? "CPNMUOJ1" + validateHex @a "CPNMUOJ1E8======" @? "CPNMUOJ1E8======" + , testCase "Std padding tests" $ do + not (validate @a "M") @? "M" + validate @a "MY" @? "MY" + validate @a "MZXQ" @? "MZXQ" + validate @a "MZXW6" @? "MZXW6" + validate @a "MZXW6YQ" @? "MZXW6YQ" + validate @a "MZXW6YTB" @? "MZXW6YTB" + validate @a "MZXW6YTBOI" @? "MZXW6YTBOI" + validate @a "MY======" @? "MY======" + validate @a "MZXQ====" @? "MZXQ====" + validate @a "MZXW6===" @? "MZXW6===" + validate @a "MZXW6YQ=" @? "MZXW6YQ=" + validate @a "MZXW6YTB" @? "MZXW6YTB" + validate @a "MZXW6YTBOI======" @? "MZXW6YTBOI======" + ] + , testGroup "Correctness unit tests" + [ testCase "Hex tests" $ do + not (correctHex @a "C") @? "C" + correctHex @a "CO" @? "CO" + correctHex @a "CPNG" @? "CPNG" + correctHex @a "CPNMU" @? "CPNMU" + correctHex @a "CPNMUOG" @? "CPNMUOG" + correctHex @a "CPNMUOJ1" @? "CPNMUOJ1" + correctHex @a "CPNMUOJ1E8" @? "CPNMUOJ1E8" + correctHex @a "CO======" @? "CO======" + correctHex @a "CPNG====" @? "CPNG====" + correctHex @a "CPNMU===" @? "CPNMU===" + correctHex @a "CPNMUOG=" @? "CPNMUOG=" + correctHex @a "CPNMUOJ1" @? "CPNMUOJ1" + correctHex @a "CPNMUOJ1E8======" @? "CPNMUOJ1E8======" + , testCase "Std tests" $ do + not (correct @a "M") @? "M" + correct @a "MY" @? "MY" + correct @a "MZXQ" @? "MZXQ" + correct @a "MZXW6" @? "MZXW6" + correct @a "MZXW6YQ" @? "MZXW6YQ" + correct @a "MZXW6YTB" @? "MZXW6YTB" + correct @a "MZXW6YTBOI" @? "MZXW6YTBOI" + correct @a "MY======" @? "MY======" + correct @a "MZXQ====" @? "MZXQ====" + correct @a "MZXW6===" @? "MZXW6===" + correct @a "MZXW6YQ=" @? "MZXW6YQ=" + correct @a "MZXW6YTB" @? "MZXW6YTB" + correct @a "MZXW6YTBOI======" @? "MZXW6YTBOI======" + ] + ] + +-- | Offset test vectors. This stresses the invalid char + incorrect padding +-- offset error messages +-- +offsetVectors :: forall a b proxy. Harness a b => proxy a -> TestTree +offsetVectors _ = testGroup "Offset tests" + [ testGroup "Hex - Invalid padding" + [ testCase "Invalid staggered padding" $ do + decodeHex @a "=PNMUOJ1E8======" @?= Left "invalid padding at offset: 0" + decodeHex @a "C=NMUOJ1E8======" @?= Left "invalid padding at offset: 1" + decodeHex @a "CP=MUOJ1E8======" @?= Left "invalid padding at offset: 2" + decodeHex @a "CPN=UOJ1E8======" @?= Left "invalid padding at offset: 3" + decodeHex @a "CPNM=OJ1E8======" @?= Left "invalid padding at offset: 4" + decodeHex @a "CPNMU=J1E8======" @?= Left "invalid padding at offset: 5" + decodeHex @a "CPNMUO=1E8======" @?= Left "invalid padding at offset: 6" + decodeHex @a "CPNMUOJ=E8======" @?= Left "invalid padding at offset: 7" + , testCase "Invalid character coverage" $ do + decodeHex @a "%PNMUOJ1E8======" @?= Left "invalid character at offset: 0" + decodeHex @a "C%NMUOJ1E8======" @?= Left "invalid character at offset: 1" + decodeHex @a "CP%MUOJ1E8======" @?= Left "invalid character at offset: 2" + decodeHex @a "CPN%UOJ1E8======" @?= Left "invalid character at offset: 3" + decodeHex @a "CPNM%OJ1E8======" @?= Left "invalid character at offset: 4" + decodeHex @a "CPNMU%J1E8======" @?= Left "invalid character at offset: 5" + decodeHex @a "CPNMUO%1E8======" @?= Left "invalid character at offset: 6" + decodeHex @a "CPNMUOJ%E8======" @?= Left "invalid character at offset: 7" + ] + , testGroup "Std - Invalid padding" + [ testCase "Invalid staggered padding" $ do + decode @a "=ZXW6YTBOI======" @?= Left "invalid padding at offset: 0" + decode @a "M=XW6YTBOI======" @?= Left "invalid padding at offset: 1" + decode @a "MZ=W6YTBOI======" @?= Left "invalid padding at offset: 2" + decode @a "MZX=6YTBOI======" @?= Left "invalid padding at offset: 3" + decode @a "MZXW=YTBOI======" @?= Left "invalid padding at offset: 4" + decode @a "MZXW6=TBOI======" @?= Left "invalid padding at offset: 5" + decode @a "MZXW6Y=BOI======" @?= Left "invalid padding at offset: 6" + decode @a "MZXW6YT=OI======" @?= Left "invalid padding at offset: 7" + , testCase "Invalid character coverage" $ do + decode @a "%ZXW6YTBOI======" @?= Left "invalid character at offset: 0" + decode @a "M%XW6YTBOI======" @?= Left "invalid character at offset: 1" + decode @a "MZ%W6YTBOI======" @?= Left "invalid character at offset: 2" + decode @a "MZX%6YTBOI======" @?= Left "invalid character at offset: 3" + decode @a "MZXW%YTBOI======" @?= Left "invalid character at offset: 4" + decode @a "MZXW6%TBOI======" @?= Left "invalid character at offset: 5" + decode @a "MZXW6Y%BOI======" @?= Left "invalid character at offset: 6" + decode @a "MZXW6YT%OI======" @?= Left "invalid character at offset: 7" + ] + ]