Skip to content

Commit

Permalink
db-sync: Reject TxMetadata objects containing NUL characters
Browse files Browse the repository at this point in the history
TxMetadata is stored as JSON and that JSON is stored in a 'jsonb' column
in PostgreSQL. However, there are limitations to that Postgres 'jsonb'
data type. Specifically, it cannot contain Uniciode NUL characters. This
temporary fix simply drops TxMetadata JSON objects that would otherwise
be rejected by Postgres. Hopefully a better solution will be will be
dreamt up and implemented later.

Temporary workaround fix for:
#297
  • Loading branch information
erikd committed Sep 21, 2020
1 parent e8825e3 commit c7ac532
Showing 1 changed file with 36 additions and 9 deletions.
45 changes: 36 additions & 9 deletions cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs
Expand Up @@ -9,11 +9,13 @@

module Cardano.DbSync.Era.Shelley.Insert
( insertShelleyBlock
, containsUnicodeNul
, safeDecodeUtf8
) where

import Cardano.Prelude

import Cardano.BM.Trace (Trace, logDebug, logError, logInfo)
import Cardano.BM.Trace (Trace, logDebug, logError, logInfo, logWarning)

import Cardano.Db (DbWord64 (..))

Expand All @@ -38,9 +40,12 @@ import Cardano.DbSync.Util
import Cardano.Slotting.Slot (EpochNo (..), EpochSize (..))

import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.Map.Strict as Map
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.Text.Encoding.Error as Text

import Database.Persist.Sql (SqlBackend)

Expand Down Expand Up @@ -481,17 +486,39 @@ insertTxMetadata
:: (MonadBaseControl IO m, MonadIO m)
=> Trace IO Text -> DB.TxId -> Shelley.MetaData
-> ExceptT DbSyncNodeError (ReaderT SqlBackend m) ()
insertTxMetadata _tracer txId (Shelley.MetaData mdmap) =
insertTxMetadata tracer txId (Shelley.MetaData mdmap) =
mapM_ insert $ Map.toList mdmap
where
insert
:: (MonadBaseControl IO m, MonadIO m)
=> (Word64, Shelley.MetaDatum)
-> ExceptT DbSyncNodeError (ReaderT SqlBackend m) ()
insert (key, md) =
void . lift . DB.insertTxMetadata $
DB.TxMetadata
{ DB.txMetadataKey = DbWord64 key
, DB.txMetadataJson = Text.decodeUtf8 . LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md)
, DB.txMetadataTxId = txId
}
insert (key, md) = do
let jsonbs = LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md)
ejson <- liftIO $ safeDecodeUtf8 jsonbs
case ejson of
Left err ->
liftIO . logWarning tracer $ mconcat
[ "insertTxMetadata: Could not decode to UTF8: ", textShow err ]
Right json -> do
-- See https://github.com/input-output-hk/cardano-db-sync/issues/297
if containsUnicodeNul json
then liftIO $ logWarning tracer "insertTxMetadata: dropped due to a Unicode NUL character."
else
void . lift . DB.insertTxMetadata $
DB.TxMetadata
{ DB.txMetadataKey = DbWord64 key
, DB.txMetadataJson = json
, DB.txMetadataTxId = txId
}

safeDecodeUtf8 :: ByteString -> IO (Either Text.UnicodeException Text)
safeDecodeUtf8 bs
| BS.any isNullChar bs = pure $ Left (Text.DecodeError (BS.unpack bs) (Just 0))
| otherwise = try $ evaluate (Text.decodeUtf8With Text.strictDecode bs)
where
isNullChar :: Char -> Bool
isNullChar ch = ord ch == 0

containsUnicodeNul :: Text -> Bool
containsUnicodeNul = Text.isInfixOf "\\u000"

0 comments on commit c7ac532

Please sign in to comment.