diff --git a/README.md b/README.md index 838a2055b..f794f734d 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ The following PostgreSQL column types are supported by ruby-pg (TE = Text Encode * Date: [TE](rdoc-ref:PG::TextEncoder::Date), [TD](rdoc-ref:PG::TextDecoder::Date), [BE](rdoc-ref:PG::BinaryEncoder::Date), [BD](rdoc-ref:PG::BinaryDecoder::Date) * JSON and JSONB: [TE](rdoc-ref:PG::TextEncoder::JSON), [TD](rdoc-ref:PG::TextDecoder::JSON) * Inet: [TE](rdoc-ref:PG::TextEncoder::Inet), [TD](rdoc-ref:PG::TextDecoder::Inet) -* Array: [TE](rdoc-ref:PG::TextEncoder::Array), [TD](rdoc-ref:PG::TextDecoder::Array) +* Array: [TE](rdoc-ref:PG::TextEncoder::Array), [TD](rdoc-ref:PG::TextDecoder::Array), [BE](rdoc-ref:PG::BinaryEncoder::Array), [BD](rdoc-ref:PG::BinaryDecoder::Array) * Composite Type (also called "Row" or "Record"): [TE](rdoc-ref:PG::TextEncoder::Record), [TD](rdoc-ref:PG::TextDecoder::Record) The following text and binary formats can also be encoded although they are not used as column type: diff --git a/ext/pg_binary_decoder.c b/ext/pg_binary_decoder.c index 432e42bf3..5006a3c72 100644 --- a/ext/pg_binary_decoder.c +++ b/ext/pg_binary_decoder.c @@ -133,6 +133,154 @@ pg_bin_dec_to_base64(t_pg_coder *conv, const char *val, int len, int tuple, int return out_value; } +/* + * Maximum number of array subscripts (arbitrary limit) + */ +#define MAXDIM 6 + +/* + * Document-class: PG::BinaryDecoder::Array < PG::CompositeDecoder + * + * This is a decoder class for conversion of binary array types. + * + * It returns an Array with possibly an arbitrary number of sub-Arrays. + * All values are decoded according to the #elements_type accessor. + * Sub-arrays are decoded recursively. + * + * This decoder simply ignores any dimension decorations preceding the array values. + * It returns all array values as regular ruby Array with a zero based index, regardless of the index given in the dimension decoration. + * + * An array decoder which respects dimension decorations is waiting to be implemented. + * + */ +static VALUE +pg_bin_dec_array(t_pg_coder *conv, const char *input_line, int len, int tuple, int field, int enc_idx) +{ + t_pg_composite_coder *this = (t_pg_composite_coder *)conv; + t_pg_coder_dec_func dec_func = pg_coder_dec_func(this->elem, this->comp.format); + + /* Current field */ + VALUE field_str; + + int32_t nitems32; + int i; + int ndim; + int nitems; + int flags; + int dim; + int dim_sizes[MAXDIM]; + VALUE arrays[MAXDIM]; + char *output_ptr; + const char *cur_ptr; + const char *line_end_ptr; + char *end_capa_ptr; + + /* Allocate a new string with embedded capacity and realloc later with + * exponential growing size when needed. */ + PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr ); + + /* set pointer variables for loop */ + cur_ptr = input_line; + line_end_ptr = input_line + len; + + /* read number of dimensions */ + if (line_end_ptr - cur_ptr < 4 ) goto length_error; + ndim = read_nbo32(cur_ptr); + if (ndim < 0 || ndim > MAXDIM) { + rb_raise( rb_eArgError, "unsupported number of array dimensions: %d", ndim ); + } + cur_ptr += 4; + + /* read flags */ + if (line_end_ptr - cur_ptr < 4 ) goto length_error; + flags = read_nbo32(cur_ptr); + if (flags != 0 && flags != 1) { + rb_raise( rb_eArgError, "unsupported binary array flags: %d", flags ); + } + cur_ptr += 4; + + /* ignore element OID */ + if (line_end_ptr - cur_ptr < 4 ) goto length_error; + cur_ptr += 4; + + nitems32 = ndim == 0 ? 0 : 1; + for (i = 0; i < ndim; i++) { + int64_t prod; + + /* read size of dimensions and ignore lower bound */ + if (line_end_ptr - cur_ptr < 8 ) goto length_error; + dim_sizes[i] = read_nbo32(cur_ptr); + prod = (int64_t) nitems32 * (int64_t) dim_sizes[i]; + nitems32 = (int32_t) prod; + if (dim_sizes[i] < 0 || (int64_t) nitems32 != prod) { + rb_raise( rb_eArgError, "unsupported array size: %" PRId64, prod ); + } + cur_ptr += 8; + } + nitems = (int)nitems32; + + dim = 0; + arrays[dim] = rb_ary_new2(ndim == 0 ? 0 : dim_sizes[dim]); + for (i = 0; i < nitems; i++) { + int input_len; + + /* traverse dimensions down */ + while (dim < ndim - 1) { + dim++; + arrays[dim] = rb_ary_new2(dim_sizes[dim]); + rb_ary_push(arrays[dim - 1], arrays[dim]); + } + + /* read element length */ + if (line_end_ptr - cur_ptr < 4 ) goto length_error; + input_len = read_nbo32(cur_ptr); + cur_ptr += 4; + + /* convert and put element into array */ + if (input_len < 0) { + if (input_len != -1) goto length_error; + /* NULL indicator */ + rb_ary_push(arrays[dim], Qnil); + } else { + VALUE field_value; + if (line_end_ptr - cur_ptr < input_len ) goto length_error; + + /* copy input data to field_str */ + PG_RB_STR_ENSURE_CAPA( field_str, input_len, output_ptr, end_capa_ptr ); + memcpy(output_ptr, cur_ptr, input_len); + cur_ptr += input_len; + output_ptr += input_len; + /* convert field_str through the type map */ + rb_str_set_len( field_str, output_ptr - RSTRING_PTR(field_str) ); + field_value = dec_func(this->elem, RSTRING_PTR(field_str), input_len, tuple, field, enc_idx); + + rb_ary_push(arrays[dim], field_value); + + if( field_value == field_str ){ + /* Our output string will be send to the user, so we can not reuse + * it for the next field. */ + PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr ); + } + } + + /* Reset the pointer to the start of the output/buffer string. */ + output_ptr = RSTRING_PTR(field_str); + + /* traverse dimensions up */ + while (RARRAY_LEN(arrays[dim]) >= dim_sizes[dim] && dim > 0) { + dim--; + } + } + + if (cur_ptr < line_end_ptr) + rb_raise( rb_eArgError, "trailing data after binary array data at position: %ld", (long)(cur_ptr - input_line) + 1 ); + + return arrays[0]; + +length_error: + rb_raise( rb_eArgError, "premature end of binary array data at position: %ld", (long)(cur_ptr - input_line) + 1 ); +} + #define PG_INT64_MIN (-0x7FFFFFFFFFFFFFFFL - 1) #define PG_INT64_MAX 0x7FFFFFFFFFFFFFFFL @@ -305,6 +453,8 @@ init_pg_binary_decoder(void) /* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "Timestamp", rb_cPG_SimpleDecoder ); */ pg_define_coder( "Timestamp", pg_bin_dec_timestamp, rb_cPG_SimpleDecoder, rb_mPG_BinaryDecoder ); + /* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "Array", rb_cPG_CompositeDecoder ); */ + pg_define_coder( "Array", pg_bin_dec_array, rb_cPG_CompositeDecoder, rb_mPG_BinaryDecoder ); /* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "ToBase64", rb_cPG_CompositeDecoder ); */ pg_define_coder( "ToBase64", pg_bin_dec_to_base64, rb_cPG_CompositeDecoder, rb_mPG_BinaryDecoder ); } diff --git a/ext/pg_binary_encoder.c b/ext/pg_binary_encoder.c index 77c29e6c5..601bd333e 100644 --- a/ext/pg_binary_encoder.c +++ b/ext/pg_binary_encoder.c @@ -304,6 +304,201 @@ pg_bin_enc_date(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate, i return 4; } +/* + * Maximum number of array subscripts (arbitrary limit) + */ +#define MAXDIM 6 + +/* + * Document-class: PG::BinaryEncoder::Array < PG::CompositeEncoder + * + * This is the encoder class for PostgreSQL array types in binary format. + * + * All values are encoded according to the #elements_type + * accessor. Sub-arrays are encoded recursively. + * + * This encoder expects an Array of values or sub-arrays as input. + * Other values are passed through as byte string without interpretation. + * + * The accessors needs_quotation and delimiter are ignored for binary encoding. + * + */ +static int +pg_bin_enc_array(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx) +{ + if (TYPE(value) == T_ARRAY) { + t_pg_composite_coder *this = (t_pg_composite_coder *)conv; + t_pg_coder_enc_func enc_func = pg_coder_enc_func(this->elem); + int dim_sizes[MAXDIM]; + int ndim = 1; + int nitems = 1; + VALUE el1 = value; + + if (RARRAY_LEN(value) == 0) { + nitems = 0; + ndim = 0; + dim_sizes[0] = 0; + } else { + /* Determine number of dimensions, sizes of dimensions and number of items */ + while(1) { + VALUE el2; + + dim_sizes[ndim-1] = RARRAY_LENINT(el1); + nitems *= dim_sizes[ndim-1]; + el2 = rb_ary_entry(el1, 0); + if (TYPE(el2) == T_ARRAY) { + ndim++; + if (ndim > MAXDIM) + rb_raise( rb_eArgError, "unsupported number of array dimensions: >%d", ndim ); + } else { + break; + } + el1 = el2; + } + } + + if(out){ + /* Second encoder pass -> write data to `out` */ + int dimpos[MAXDIM]; + VALUE arrays[MAXDIM]; + int dim = 0; + int item_idx = 0; + int i; + char *orig_out = out; + Oid elem_oid = this->elem ? this->elem->oid : 0; + + write_nbo32(ndim, out); out += 4; + write_nbo32(1 /* flags */, out); out += 4; + write_nbo32(elem_oid, out); out += 4; + for (i = 0; i < ndim; i++) { + dimpos[i] = 0; + write_nbo32(dim_sizes[i], out); out += 4; + write_nbo32(1 /* offset */, out); out += 4; + } + arrays[0] = value; + + while(1) { + /* traverse tree down */ + while (dim < ndim - 1) { + arrays[dim + 1] = rb_ary_entry(arrays[dim], dimpos[dim]); + dim++; + } + + for (i = 0; i < dim_sizes[dim]; i++) { + VALUE item = rb_ary_entry(arrays[dim], i); + + if (NIL_P(item)) { + write_nbo32(-1, out); out += 4; + } else { + /* Encoded string is returned in subint */ + int strlen; + VALUE is_one_pass = rb_ary_entry(*intermediate, item_idx++); + VALUE subint = rb_ary_entry(*intermediate, item_idx++); + + if (is_one_pass == Qtrue) { + strlen = RSTRING_LENINT(subint); + memcpy( out + 4, RSTRING_PTR(subint), strlen); + } else { + strlen = enc_func(this->elem, item, out + 4, &subint, enc_idx); + } + write_nbo32(strlen, out); + out += 4 /* length */ + strlen; + } + } + + /* traverse tree up and go to next sibling array */ + do { + if (dim > 0) { + dimpos[dim] = 0; + dim--; + dimpos[dim]++; + } else { + goto finished2; + } + } while (dimpos[dim] >= dim_sizes[dim]); + } + finished2: + return (int)(out - orig_out); + + } else { + /* First encoder pass -> determine required buffer space for `out` */ + + int dimpos[MAXDIM]; + VALUE arrays[MAXDIM]; + int dim = 0; + int item_idx = 0; + int i; + int size_sum = 0; + + *intermediate = rb_ary_new2(nitems); + + for (i = 0; i < MAXDIM; i++) { + dimpos[i] = 0; + } + arrays[0] = value; + + while(1) { + + /* traverse tree down */ + while (dim < ndim - 1) { + VALUE array = rb_ary_entry(arrays[dim], dimpos[dim]); + if (TYPE(array) != T_ARRAY) { + rb_raise( rb_eArgError, "expected Array instead of %+"PRIsVALUE" in dimension %d", array, dim + 1 ); + } + if (dim_sizes[dim + 1] != RARRAY_LEN(array)) { + rb_raise( rb_eArgError, "varying number of array elements (%d and %d) in dimension %d", dim_sizes[dim + 1], RARRAY_LENINT(array), dim + 1 ); + } + arrays[dim + 1] = array; + dim++; + } + + for (i = 0; i < dim_sizes[dim]; i++) { + VALUE item = rb_ary_entry(arrays[dim], i); + + if (NIL_P(item)) { + size_sum += 4 /* length bytes = -1 */; + } else { + VALUE subint; + int strlen = enc_func(this->elem, item, NULL, &subint, enc_idx); + + /* Gather all intermediate values of elements into an array, which is returned as intermediate for the array encoder */ + if( strlen == -1 ){ + /* Encoded string is returned in subint */ + rb_ary_store(*intermediate, item_idx++, Qtrue); + rb_ary_store(*intermediate, item_idx++, subint); + + strlen = RSTRING_LENINT(subint); + } else { + /* Two passes necessary */ + rb_ary_store(*intermediate, item_idx++, Qfalse); + rb_ary_store(*intermediate, item_idx++, subint); + } + size_sum += 4 /* length bytes */ + strlen; + } + } + + /* traverse tree up and go to next sibling array */ + do { + if (dim > 0) { + dimpos[dim] = 0; + dim--; + dimpos[dim]++; + } else { + goto finished1; + } + } while (dimpos[dim] >= dim_sizes[dim]); + } + finished1:; + + return 4 /* ndim */ + 4 /* flags */ + 4 /* oid */ + + ndim * (4 /* dim size */ + 4 /* dim offset */) + + size_sum; + } + } else { + return pg_coder_enc_to_s( conv, value, out, intermediate, enc_idx ); + } +} + /* * Document-class: PG::BinaryEncoder::FromBase64 < PG::CompositeEncoder * @@ -381,6 +576,8 @@ init_pg_binary_encoder(void) /* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Date", rb_cPG_SimpleEncoder ); */ pg_define_coder( "Date", pg_bin_enc_date, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder ); + /* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Array", rb_cPG_CompositeEncoder ); */ + pg_define_coder( "Array", pg_bin_enc_array, rb_cPG_CompositeEncoder, rb_mPG_BinaryEncoder ); /* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "FromBase64", rb_cPG_CompositeEncoder ); */ pg_define_coder( "FromBase64", pg_bin_enc_from_base64, rb_cPG_CompositeEncoder, rb_mPG_BinaryEncoder ); } diff --git a/lib/pg/basic_type_registry.rb b/lib/pg/basic_type_registry.rb index 4a6daaa70..02c2cca2b 100644 --- a/lib/pg/basic_type_registry.rb +++ b/lib/pg/basic_type_registry.rb @@ -127,8 +127,8 @@ def initialize(connection, registry: nil) @maps = [ [0, :encoder, PG::TextEncoder::Array], [0, :decoder, PG::TextDecoder::Array], - [1, :encoder, nil], - [1, :decoder, nil], + [1, :encoder, PG::BinaryEncoder::Array], + [1, :decoder, PG::BinaryDecoder::Array], ].inject([]) do |h, (format, direction, arraycoder)| coders = registry.coders_for(format, direction) || {} h[format] ||= {} diff --git a/spec/pg/basic_type_map_based_on_result_spec.rb b/spec/pg/basic_type_map_based_on_result_spec.rb index e8c26e2f6..6e49d98e6 100644 --- a/spec/pg/basic_type_map_based_on_result_spec.rb +++ b/spec/pg/basic_type_map_based_on_result_spec.rb @@ -29,64 +29,46 @@ Ractor.make_shareable(basic_type_mapping) end - it "should be usable with Ractor in text format", :ractor do - vals = Ractor.new(@conninfo) do |conninfo| - conn = PG.connect(conninfo) - basic_type_mapping = PG::BasicTypeMapBasedOnResult.new(conn) - conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[])" ) - - # Retrieve table OIDs per empty result set. - res = conn.exec( "SELECT * FROM copytable LIMIT 0" ) - tm = basic_type_mapping.build_column_map( res ) - row_encoder = PG::TextEncoder::CopyRow.new type_map: tm - - conn.copy_data( "COPY copytable FROM STDIN", row_encoder ) do |res| - conn.put_copy_data ['b', 234, [2,3]] - end - res = conn.exec( "SELECT * FROM copytable" ) - res.values - ensure - conn&.finish - end.take - - expect( vals ).to eq( [['b', '234', '{2,3}']] ) - end + [1, 0].each do |format| + it "should be usable with Ractor in format #{format}", :ractor do + vals = Ractor.new(@conninfo, format) do |conninfo, format| + conn = PG.connect(conninfo) + basic_type_mapping = PG::BasicTypeMapBasedOnResult.new(conn) + conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[])" ) - it "should be usable with Ractor in binary format", :ractor do - vals = Ractor.new(@conninfo) do |conninfo| - conn = PG.connect(conninfo) - basic_type_mapping = PG::BasicTypeMapBasedOnResult.new(conn) - conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT)" ) - - # Retrieve table OIDs per empty result set. - res = conn.exec( "SELECT * FROM copytable LIMIT 0", [], 1) - tm = basic_type_mapping.build_column_map( res ) - row_encoder = PG::BinaryEncoder::CopyRow.new type_map: tm + # Retrieve table OIDs per empty result set. + res = conn.exec( "SELECT * FROM copytable LIMIT 0", [], format ) + tm = basic_type_mapping.build_column_map( res ) + nsp = format==1 ? PG::BinaryEncoder : PG::TextEncoder + row_encoder = nsp::CopyRow.new type_map: tm - conn.copy_data( "COPY copytable FROM STDIN WITH (FORMAT binary)", row_encoder ) do |res| - conn.put_copy_data ['b', 234] - end - res = conn.exec( "SELECT * FROM copytable" ) - res.values - ensure - conn&.finish - end.take + conn.copy_data( "COPY copytable FROM STDIN WITH (FORMAT #{ format==1 ? "binary" : "text" })", row_encoder ) do |res| + conn.put_copy_data ['b', 234, [2,3]] + end + res = conn.exec( "SELECT * FROM copytable" ) + res.values + ensure + conn&.finish + end.take - expect( vals ).to eq( [['b', '234']] ) + expect( vals ).to eq( [['b', '234', '{2,3}']] ) + end end context "with usage of result oids for bind params encoder selection" do - it "can type cast query params" do - @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[], by BYTEA)" ) + [1, 0].each do |format| + it "can type cast query params to format #{format}" do + @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[], by BYTEA)" ) - # Retrieve table OIDs per empty result. - res = @conn.exec( "SELECT * FROM copytable LIMIT 0" ) - tm = basic_type_mapping.build_column_map( res ) + # Retrieve table OIDs per empty result. + res = @conn.exec( "SELECT * FROM copytable LIMIT 0", [], format ) + tm = basic_type_mapping.build_column_map( res ) - @conn.exec_params( "INSERT INTO copytable VALUES ($1, $2, $3, $4)", ['a', 123, [5,4,3], "\0\xFF'"], 0, tm ) - @conn.exec_params( "INSERT INTO copytable VALUES ($1, $2, $3, $4)", ['b', 234, [2,3], "\"\n\r"], 0, tm ) - res = @conn.exec( "SELECT * FROM copytable" ) - expect( res.values ).to eq( [['a', '123', '{5,4,3}', '\x00ff27'], ['b', '234', '{2,3}', '\x220a0d']] ) + @conn.exec_params( "INSERT INTO copytable VALUES ($1, $2, $3, $4)", ['a', 123, [5,4,3], "\0\xFF'"], 0, tm ) + @conn.exec_params( "INSERT INTO copytable VALUES ($1, $2, $3, $4)", ['b', 234, [2,3], "\"\n\r"], 0, tm ) + res = @conn.exec( "SELECT * FROM copytable" ) + expect( res.values ).to eq( [['a', '123', '{5,4,3}', '\x00ff27'], ['b', '234', '{2,3}', '\x220a0d']] ) + end end it "can do JSON conversions", :postgresql_94 do @@ -124,20 +106,23 @@ end context "with usage of result oids for copy encoder selection" do - it "can type cast #copy_data text input with encoder" do - @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[])" ) + [1, 0].each do |format| + it "can type cast #copy_data text input with encoder to format #{format}" do + @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[])" ) - # Retrieve table OIDs per empty result set. - res = @conn.exec( "SELECT * FROM copytable LIMIT 0" ) - tm = basic_type_mapping.build_column_map( res ) - row_encoder = PG::TextEncoder::CopyRow.new type_map: tm + # Retrieve table OIDs per empty result set. + res = @conn.exec( "SELECT * FROM copytable LIMIT 0", [], format ) + tm = basic_type_mapping.build_column_map( res ) + nsp = format==1 ? PG::BinaryEncoder : PG::TextEncoder + row_encoder = nsp::CopyRow.new type_map: tm - @conn.copy_data( "COPY copytable FROM STDIN", row_encoder ) do |res| - @conn.put_copy_data ['a', 123, [5,4,3]] - @conn.put_copy_data ['b', 234, [2,3]] + @conn.copy_data( "COPY copytable FROM STDIN WITH (FORMAT #{ format==1 ? "binary" : "text" })", row_encoder ) do |res| + @conn.put_copy_data ['a', 123, [[5,4],[3,2]]] + @conn.put_copy_data ['b', 234, [2,3]] + end + res = @conn.exec( "SELECT * FROM copytable" ) + expect( res.values ).to eq( [['a', '123', '{{5,4},{3,2}}'], ['b', '234', '{2,3}']] ) end - res = @conn.exec( "SELECT * FROM copytable" ) - expect( res.values ).to eq( [['a', '123', '{5,4,3}'], ['b', '234', '{2,3}']] ) end [1, 0].each do |format| diff --git a/spec/pg/basic_type_map_for_results_spec.rb b/spec/pg/basic_type_map_for_results_spec.rb index 04e46abc8..327d943db 100644 --- a/spec/pg/basic_type_map_for_results_spec.rb +++ b/spec/pg/basic_type_map_for_results_spec.rb @@ -295,7 +295,7 @@ end end - [0].each do |format| + [0, 1].each do |format| it "should do format #{format} array type conversions" do res = @conn.exec_params( "SELECT CAST('{1,2,3}' AS INT2[]), CAST('{{1,2},{3,4}}' AS INT2[][]), CAST('{1,2,3}' AS INT4[]), @@ -397,44 +397,32 @@ end context "with usage of result oids for copy decoder selection" do - it "can type cast #copy_data text output with decoder" do - @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[])" ) - @conn.exec( "INSERT INTO copytable VALUES ('a', 123, '{5,4,3}'), ('b', 234, '{2,3}')" ) - - # Retrieve table OIDs per empty result. - res = @conn.exec( "SELECT * FROM copytable LIMIT 0" ) - tm = basic_type_mapping.build_column_map( res ) - row_decoder = PG::TextDecoder::CopyRow.new(type_map: tm).freeze - - rows = [] - @conn.copy_data( "COPY copytable TO STDOUT", row_decoder ) do |res| - while row=@conn.get_copy_data - rows << row + [0, 1].each do |format| + it "can type cast #copy_data output in format #{format} with decoder" do + @conn.exec( "CREATE TEMP TABLE copytable (t TEXT, i INT, ai INT[], b BYTEA, ts timestamp)" ) + @conn.exec( "INSERT INTO copytable VALUES ('a', 1234, '{{5,4},{3,2}}', '\\xff000a0d27', '2023-03-17 03:04:05.678912'), ('b', -444, '{2,3}', '\\x202078797a2020', '1990-12-17 15:14:45')" ) + + # Retrieve table OIDs per empty result. + res = @conn.exec( "SELECT * FROM copytable LIMIT 0", [], format ) + tm = basic_type_mapping.build_column_map( res ) + nsp = format==1 ? PG::BinaryDecoder : PG::TextDecoder + row_decoder = nsp::CopyRow.new(type_map: tm).freeze + + rows = [] + @conn.copy_data( "COPY copytable TO STDOUT WITH (FORMAT #{ format==1 ? "binary" : "text" })", row_decoder ) do |res| + while row=@conn.get_copy_data + rows << row + end end - end - expect( rows ).to eq( [['a', 123, [5,4,3]], ['b', 234, [2,3]]] ) - end - - it "can type cast #copy_data binary output with decoder" do - @conn.exec( "CREATE TEMP TABLE copytable (b BYTEA, i INT, ts timestamp)" ) - @conn.exec( "INSERT INTO copytable VALUES ('\\xff000a0d27', 1234, '2023-03-17 03:04:05.678912'), ('\\x202078797a2020', '-444', '1990-12-17 15:14:45')" ) - # Retrieve table OIDs per empty result. - res = @conn.exec_params( "SELECT * FROM copytable LIMIT 0", [], 1 ) - tm = basic_type_mapping.build_column_map( res ) - row_decoder = PG::BinaryDecoder::CopyRow.new(type_map: tm).freeze - - rows = [] - @conn.copy_data( "COPY copytable TO STDOUT WITH (FORMAT binary)", row_decoder ) do |res| - while row=@conn.get_copy_data - rows << row - end + expect( rows.map{|l| l[0,4] } ).to eq( [['a', 1234, [[5,4],[3,2]], "\xff\x00\n\r'".b], ['b', -444, [2,3], " xyz "]] ) + # For compatibility reason the timestamp in text format is encoded as local time (TimestampWithoutTimeZone) instead of UTC + tmeth = format == 1 ? :utc : :local + expect( rows[0][4] ). + to be_within(0.000001).of( Time.send(tmeth, 2023, 3, 17, 3, 4, 5.678912) ) + expect( rows[1][4] ). + to be_within(0.000001).of( Time.send(tmeth, 1990, 12, 17, 15, 14, 45) ) end - expect( rows.map{|l| l[0,2] } ).to eq( [["\xff\x00\n\r'".b, 1234], [" xyz ", -444]] ) - expect( rows[0][2] ). - to be_within(0.000001).of( Time.utc(2023, 3, 17, 3, 4, 5.678912) ) - expect( rows[1][2] ). - to be_within(0.000001).of( Time.utc(1990, 12, 17, 15, 14, 45) ) end end end diff --git a/spec/pg/type_spec.rb b/spec/pg/type_spec.rb index 4d64adef5..1393f7793 100644 --- a/spec/pg/type_spec.rb +++ b/spec/pg/type_spec.rb @@ -630,6 +630,9 @@ def expect_deprecated_coder_init let!(:textenc_string_array_with_delimiter) { PG::TextEncoder::Array.new elements_type: textenc_string, delimiter: ';' } let!(:textdec_string_array_with_delimiter) { PG::TextDecoder::Array.new elements_type: textdec_string, delimiter: ';' } let!(:textdec_bytea_array) { PG::TextDecoder::Array.new elements_type: textdec_bytea } + let!(:binarydec_array) { PG::BinaryDecoder::Array.new } + let!(:binarydec_int_array) { PG::BinaryDecoder::Array.new elements_type: PG::BinaryDecoder::Integer.new } + let!(:binaryenc_array) { PG::BinaryEncoder::Array.new } # # Array parser specs are thankfully borrowed from here: @@ -638,9 +641,15 @@ def expect_deprecated_coder_init describe '#decode' do context 'one dimensional arrays' do context 'empty' do - it 'returns an empty array' do + it 'returns an empty array from text' do expect( textdec_string_array.decode(%[{}]) ).to eq( [] ) end + + it 'returns an empty array from binary' do + # binary '{}'::TEXT[] + b = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19" + expect( binarydec_array.decode(b) ).to eq( [] ) + end end context 'no strings' do @@ -714,7 +723,7 @@ def expect_deprecated_coder_init expect( textdec_string_array.decode(%({1,2,3}x)) ).to eq(['1','2','3']) expect( textdec_string_array.decode(%({{1,2},{2,3})) ).to eq([['1','2'],['2','3']]) expect( textdec_string_array.decode(%({{1,2},{2,3}}x)) ).to eq([['1','2'],['2','3']]) - expect( textdec_string_array.decode(%({[1,2},{2,3}}})) ).to eq(['[1','2']) + expect( textdec_string_array.decode(%({[1,2},{2,3}}})) ).to eq(["[1",'2']) end end @@ -766,11 +775,11 @@ def expect_deprecated_coder_init it 'returns an array of strings with a sub array and a quoted }' do expect( textdec_string_array.decode(%[{1,{"2,}3",NULL},4}]) ).to eq( ['1',['2,}3',nil],'4'] ) end - it 'returns an array of strings with a sub array and a quoted {' do - expect( textdec_string_array.decode(%[{1,{"2,{3"},4}]) ).to eq( ['1',['2,{3'],'4'] ) + it "returns an array of strings with a sub array and a quoted {" do + expect( textdec_string_array.decode(%[{1,{"2,{3"},4}]) ).to eq( ['1',["2,{3"],'4'] ) end - it 'returns an array of strings with a sub array and a quoted { and escaped quote' do - expect( textdec_string_array.decode(%[{1,{"2\\",{3"},4}]) ).to eq( ['1',['2",{3'],'4'] ) + it "returns an array of strings with a sub array and a quoted { and escaped quote" do + expect( textdec_string_array.decode(%[{1,{"2\\",{3"},4}]) ).to eq( ['1',["2\",{3"],'4'] ) end it 'returns an array of strings with a sub array with empty strings' do expect( textdec_string_array.decode(%[{1,{""},4,{""}}]) ).to eq( ['1',[''],'4',['']] ) @@ -793,6 +802,94 @@ def expect_deprecated_coder_init it 'returns an array of strings with sub arrays' do expect( textdec_string_array.decode(%[{1,{2,{3,4}},{NULL,6},7}]) ).to eq( ['1',['2',['3','4']],[nil,'6'],'7'] ) end + + # '[-1:1][-2:-2][-3:-2]={{{5,6}},{{6,7}},{{NULL,5}}}'::INT[] + let!(:bin_int_array_data) do + [ "00000003" + "00000001" + "00000017" + + "00000003" + "ffffffff" + + "00000001" + "fffffffe" + + "00000002" + "fffffffd" + + "00000004" + "00000005" + + "00000004" + "00000006" + + "00000004" + "00000006" + + "00000004" + "00000007" + + "ffffffff" + + "00000004" + "00000005" + ].pack("H*") + end + + # '[-1:1][-2:-2][-3:-2]={{{5,6"}},{{6,7}},{{5,NULL}}}'::TEXT[] + let!(:bin_text_array_data) do + [ "00000003" + "00000001" + "00000019" + + "00000003" + "ffffffff" + + "00000001" + "fffffffe" + + "00000002" + "fffffffd" + + "00000001" + "35" + + "00000001" + "36" + + "00000002" + "3622" + + "00000001" + "37" + + "ffffffff" + + "00000001" + "35" + ].pack("H*") + end + + it 'can decode binary int[]' do + expect( binarydec_int_array.decode(bin_int_array_data) ).to eq( [[[5, 6]], [[6, 7]], [[nil, 5]]] ) + end + it 'can decode binary text[]' do + expect( binarydec_array.decode(bin_text_array_data) ).to eq( [[["5", "6"]], [["6\"", "7"]], [[nil, "5"]]] ) + end + it 'can decode binary text[] with 6 dimensions' do + d = ["00000006" + "00000001" + "00000019" + + "00000001" + "ffffffff" + + "00000001" + "fffffffe" + + "00000001" + "fffffffd" + + "00000001" + "ffffffff" + + "00000001" + "fffffffe" + + "00000001" + "fffffffd" + + "ffffffff" + ].pack("H*") + expect( binarydec_array.decode(d) ).to eq( [[[[[[nil]]]]]] ) + end + it 'raises error when binary array is incomplete' do + (0 ... bin_int_array_data.bytesize).each do |i| + expect do + binarydec_int_array.decode(bin_int_array_data[0, i]) + end.to raise_error(ArgumentError, /premature/) + end + end + it 'raises error when binary array has additonal bytes' do + expect do + binarydec_int_array.decode(bin_int_array_data + "\0") + end.to raise_error(ArgumentError, /trailing/) + end + it 'raises error when binary array has too many dimensions' do + d = ["00000007" + "00000001" + "00000019"].pack("H*") + expect do + binarydec_int_array.decode(d) + end.to raise_error(ArgumentError, /dimensions/) + end + it 'raises error when binary array has invalid dimensions' do + d = ["ffffffff" + "00000001" + "00000019"].pack("H*") + expect do + binarydec_int_array.decode(d) + end.to raise_error(ArgumentError, /dimensions/) + end + it 'raises error when binary array has invalid flags' do + d = ["00000000" + "00000002" + "00000019"].pack("H*") + expect do + binarydec_int_array.decode(d) + end.to raise_error(ArgumentError, /flags/) + end + it 'raises error when binary array has invalid flags' do + d = ["00000002" + "00000000" + "00000019" + + "00010000" + "ffffffff" + + "00010000" + "fffffffe" + ].pack("H*") + expect do + binarydec_int_array.decode(d) + end.to raise_error(ArgumentError, /array size/) + end end it 'should decode array of types with decoder in ruby space' do @@ -823,13 +920,51 @@ def expect_deprecated_coder_init it 'encodes an array of float8 with sub arrays' do expect( textenc_float_array.encode([1000.11,[-0.00000221,[3.31,-441]],[nil,6.61],-7.71]) ).to match(Regexp.new(%[^{1000.1*,{-2.2*e-*6,{3.3*,-441.0}},{NULL,6.6*},-7.7*}$].gsub(/([\.\+\{\}\,])/, "\\\\\\1").gsub(/\*/, "\\d*"))) end + + let!(:binaryenc_int4_array) { PG::BinaryEncoder::Array.new elements_type: PG::BinaryEncoder::Int4.new(oid: 0x17) } + + it 'encodes an array of int4 with sub arrays' do + exp = ["00000003" + "00000001" + "00000017" + + "00000003" + "00000001" + + "00000001" + "00000001" + + "00000002" + "00000001" + + "00000004" + "00000005" + + "00000004" + "00000006" + + "00000004" + "00000006" + + "00000004" + "00000007" + + "ffffffff" + + "00000004" + "00000005" + ].pack("H*") + + expect( binaryenc_int4_array.encode([[[5,6]],[[6,7]],[[nil,5]]]) ).to eq( exp ) + end + + let!(:binaryenc_text_array) { PG::BinaryEncoder::Array.new elements_type: PG::BinaryEncoder::String.new(oid: 0x19) } + + it 'encodes an array of text with sub arrays' do + exp =["00000003" + "00000001" + "00000019" + + "00000003" + "00000001" + + "00000001" + "00000001" + + "00000002" + "00000001" + + "00000001" + "35" + + "00000001" + "36" + + "00000002" + "3622" + + "00000001" + "37" + + "ffffffff" + + "00000001" + "35" + ].pack("H*") + + expect( binaryenc_text_array.encode([[[5,6]],[["6\"",7]],[[nil,5]]]) ).to eq( exp ) + end end + context 'two dimensional arrays' do it 'encodes an array of timestamps with sub arrays' do expect( textenc_timestamp_array.encode([Time.new(2014,12,31),[nil, Time.new(2016,01,02, 23, 23, 59.99)]]) ). to eq( %[{2014-12-31 00:00:00.000000000,{NULL,2016-01-02 23:23:59.990000000}}] ) end end + context 'one dimensional array' do it 'can encode empty arrays' do expect( textenc_int_array.encode([]) ).to eq( '{}' ) @@ -841,6 +976,44 @@ def expect_deprecated_coder_init it 'respects a different delimiter' do expect( textenc_string_array_with_delimiter.encode(['a','b,','c']) ).to eq( '{a;b,;c}' ) end + + it 'encodes an array' do + exp =["00000001" + "00000001" + "00000000" + + "00000002" + "00000001" + + "ffffffff" + + "00000002" + "3622" + ].pack("H*") + + expect( binaryenc_array.encode([nil, "6\""]) ).to eq( exp ) + end + end + + context 'other dimensional array' do + it 'encodes an empty array as zero dimensions' do + exp =["00000000" + "00000001" + "00000000"].pack("H*") + expect( binaryenc_array.encode([]) ).to eq( exp ) + end + it 'encodes a 6 dimensional array' do + exp =["00000006" + "00000001" + "00000000" + + "00000001" + "00000001" + + "00000001" + "00000001" + + "00000001" + "00000001" + + "00000001" + "00000001" + + "00000001" + "00000001" + + "00000001" + "00000001" + + "ffffffff" + ].pack("H*") + expect( binaryenc_array.encode([[[[[[nil]]]]]]) ).to eq( exp ) + end + it 'raises an error on too many dimensions' do + expect{ binaryenc_array.encode([[[[[[[nil]]]]]]]) }.to raise_error( ArgumentError, /number of array dimensions/) + end + it 'raises an error on changed dimensions' do + expect{ binaryenc_array.encode([[1], 2]) }.to raise_error( ArgumentError, /Array instead of 2 /) + end + it 'raises an error on varying array sizes' do + expect{ binaryenc_array.encode([[1], [2,3]]) }.to raise_error( ArgumentError, /varying number /) + end end context 'array of types with encoder in ruby space' do