Permalink
Browse files

Added formatCopyRow

  • Loading branch information...
1 parent a2c0a3f commit f9ffdf109f5a87f5981dddd13f84adae521c5b1b @joeyadams committed Mar 8, 2012
Showing with 176 additions and 0 deletions.
  1. +79 −0 Database/PostgreSQL/LibPQ.hsc
  2. +94 −0 cbits/escape-copy.c
  3. +3 −0 postgresql-libpq.cabal
@@ -143,6 +143,9 @@ module Database.PostgreSQL.LibPQ
, escapeByteaConn
, unescapeBytea
+ -- * Using COPY FROM
+ , formatCopyRow
+
-- * Asynchronous Command Processing
-- $asynccommand
, sendQuery
@@ -1506,6 +1509,67 @@ unescapeBytea bs =
return $ Just $ B.fromForeignPtr tofp 0 $ fromIntegral l
+-- | Escape a row of data for use with a COPY FROM statement.
+-- Include a trailing newline at the end.
+--
+-- This assumes text format (rather than BINARY or CSV) with the default
+-- delimiter (tab) and default null string (\\N). A suitable query looks like:
+--
+-- >COPY tablename (id, col1, col2) FROM stdin;
+formatCopyRow :: [Maybe (B.ByteString, Format)] -> IO B.ByteString
+formatCopyRow params = withFormatCopyRow params B.packCStringLen
+
+withFormatCopyRow :: [Maybe (B.ByteString, Format)]
+ -> (CStringLen -> IO a)
+ -> IO a
+withFormatCopyRow params inner =
+ let bufsize =
+ if null params
+ then 1
+ else sum $ map paramSize params
+ in allocaBytes bufsize $ \buf -> do
+ end <- emitParams buf params
+ let len = end `minusPtr` buf
+ if len <= bufsize
+ then inner (castPtr buf, len)
+ else error $ "formatCopyRow: Buffer overrun (buffer is "
+ ++ show bufsize ++ " bytes, but "
+ ++ show len ++ " bytes were written into it)"
+
+-- | Compute the maximum number of bytes the escaped datum may take up,
+-- including the trailing tab or newline character.
+paramSize :: Maybe (B.ByteString, Format) -> Int
+paramSize Nothing = 3 -- Length of "\\N\t"
+paramSize (Just (s, Text)) = B.length s * 2 + 1
+paramSize (Just (s, Binary)) = B.length s * 5 + 1
+
+emitParam :: Ptr CUChar -> Maybe (B.ByteString, Format) -> IO (Ptr CUChar)
+emitParam out Nothing = do
+ pokeElemOff out 0 92 -- '\\'
+ pokeElemOff out 1 78 -- 'N'
+ return (out `plusPtr` 2)
+emitParam out (Just (s, Text)) =
+ B.unsafeUseAsCStringLen s $ \(ptr, len) ->
+ c_escape_copy_text (castPtr ptr) (fromIntegral len) out
+emitParam out (Just (s, Binary)) =
+ B.unsafeUseAsCStringLen s $ \(ptr, len) ->
+ c_escape_copy_bytea (castPtr ptr) (fromIntegral len) out
+
+emitParams :: Ptr CUChar -> [Maybe (B.ByteString, Format)] -> IO (Ptr CUChar)
+emitParams out [] = do
+ poke out 10 -- newline
+ return (out `plusPtr` 1)
+emitParams out (x:xs) = do
+ out' <- emitParam out x
+ if null xs
+ then do
+ poke out' 10 -- newline
+ return (out' `plusPtr` 1)
+ else do
+ poke out' 9 -- tab
+ emitParams (out' `plusPtr` 1) xs
+
+
-- $asynccommand
-- The 'exec' function is adequate for submitting commands in normal,
-- synchronous applications. It has a couple of deficiencies, however,
@@ -2503,3 +2567,18 @@ foreign import ccall "libpq-fs.h lo_close"
foreign import ccall "libpq-fs.h lo_unlink"
c_lo_unlink :: Ptr PGconn -> Oid -> IO CInt
+
+------------------------------------------------------------------------
+-- cbits imports
+
+foreign import ccall unsafe
+ c_escape_copy_text :: Ptr CUChar -- ^ const unsigned char *in
+ -> CInt -- ^ int in_size
+ -> Ptr CUChar -- ^ unsigned char *out
+ -> IO (Ptr CUChar) -- ^ Returns pointer to end of written data
+
+foreign import ccall unsafe
+ c_escape_copy_bytea :: Ptr CUChar -- ^ const unsigned char *in
+ -> CInt -- ^ int in_size
+ -> Ptr CUChar -- ^ unsigned char *out
+ -> IO (Ptr CUChar) -- ^ Returns pointer to end of written data
View
@@ -0,0 +1,94 @@
+/*
+ * Escape a datum for COPY FROM. The buffer pointed to by @out should be
+ * at least 2*in_size bytes long.
+ *
+ * Return a pointer to the end of the bytes emitted.
+ */
+unsigned char *c_escape_copy_text(const unsigned char *in, int in_size, unsigned char *out)
+{
+ while (in_size-- > 0) {
+ unsigned char c = *in++;
+
+ switch (c) {
+ case '\t':
+ *out++ = '\\';
+ *out++ = 't';
+ break;
+ case '\n':
+ *out++ = '\\';
+ *out++ = 'n';
+ break;
+ case '\r':
+ *out++ = '\\';
+ *out++ = 'r';
+ break;
+ case '\\':
+ *out++ = '\\';
+ *out++ = '\\';
+ break;
+
+ default:
+ *out++ = c;
+ }
+ }
+
+ return out;
+}
+
+/*
+ * Like c_escape_copy_text, but escape the datum so it will be suitable for
+ * PostgreSQL's BYTEA input function. Note that this does not use the hex
+ * format introduced by PostgreSQL 9.0, as it is readable only by
+ * PostgreSQL 9.0 and up.
+ *
+ * This performs two escape operations:
+ *
+ * * Convert raw binary data to the format accepted by PostgreSQL's BYTEA
+ * input function.
+ *
+ * * Escape the result for use in COPY FROM data.
+ *
+ * The buffer pointed to by @out should be at least 5*in_size bytes long.
+ */
+unsigned char *c_escape_copy_bytea(const unsigned char *in, int in_size, unsigned char *out)
+{
+ while (in_size-- > 0) {
+ unsigned char c = *in++;
+
+ if (c == '\\') {
+ /* Escape backslash twice, once for BYTEA, and again for COPY FROM. */
+ *out++ = '\\';
+ *out++ = '\\';
+ *out++ = '\\';
+ *out++ = '\\';
+ } else if (c >= 32 && c <= 126) {
+ /*
+ * Printable characters (except backslash) are subject to neither
+ * BYTEA escaping nor COPY FROM escaping.
+ */
+ *out++ = c;
+ } else {
+ /*
+ * Escape using octal format. This consists of two backslashes
+ * (single backslash, escaped for COPY FROM) followed by three
+ * digits [0-7].
+ *
+ * We can't use letter escapes \t, \n, \r because:
+ *
+ * * The BYTEA input function doesn't understand letter escapes.
+ *
+ * * We could use only one backslash so BYTEA sees the literal
+ * octet values of 9, 10, and 13. However, we're escaping other
+ * non-printable characters for BYTEA; why give 9, 10, and 13
+ * special treatment?
+ */
+ *out++ = '\\';
+ *out++ = '\\';
+ *out++ = '0' + ((c >> 6) & 0x7);
+ *out++ = '0' + ((c >> 3) & 0x7);
+ *out++ = '0' + (c & 0x7);
+ }
+ }
+
+ return out;
+}
View
@@ -23,6 +23,9 @@ Cabal-version: >=1.8
Library
Exposed-modules: Database.PostgreSQL.LibPQ
+ C-Sources:
+ cbits/escape-copy.c
+
Build-depends: base >= 4 && < 5
, bytestring

3 comments on commit f9ffdf1

Honestly, I'm wondering if this belongs in postgresql-libpq. Maybe it does, maybe it doesn't. I'll have to think about it.

Also, why aren't you using escapeByteaConn function already provided by libpq?

@lpsmith do you think it should be rewritten using escapeByteaConn function ?

Not necessarily, I haven't looked at this issue in a long time. You'd have to determine if escapeByteaConn is in fact a correct thing to do here; and Joey is a highly conscientious programmer so he probably did have a good reason for doing what he did.

Please sign in to comment.