Skip to content
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ The following PostgreSQL column types are supported by ruby-pg (TE = Text Encode
* Date: [TE](rdoc-ref:PG::TextEncoder::Date), [TD](rdoc-ref:PG::TextDecoder::Date), [BE](rdoc-ref:PG::BinaryEncoder::Date), [BD](rdoc-ref:PG::BinaryDecoder::Date)
* JSON and JSONB: [TE](rdoc-ref:PG::TextEncoder::JSON), [TD](rdoc-ref:PG::TextDecoder::JSON)
* Inet: [TE](rdoc-ref:PG::TextEncoder::Inet), [TD](rdoc-ref:PG::TextDecoder::Inet)
* Array: [TE](rdoc-ref:PG::TextEncoder::Array), [TD](rdoc-ref:PG::TextDecoder::Array)
* Array: [TE](rdoc-ref:PG::TextEncoder::Array), [TD](rdoc-ref:PG::TextDecoder::Array), [BE](rdoc-ref:PG::BinaryEncoder::Array), [BD](rdoc-ref:PG::BinaryDecoder::Array)
* Composite Type (also called "Row" or "Record"): [TE](rdoc-ref:PG::TextEncoder::Record), [TD](rdoc-ref:PG::TextDecoder::Record)

The following text and binary formats can also be encoded although they are not used as column type:
Expand Down
150 changes: 150 additions & 0 deletions ext/pg_binary_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,154 @@ pg_bin_dec_to_base64(t_pg_coder *conv, const char *val, int len, int tuple, int
return out_value;
}

/*
* Maximum number of array subscripts (arbitrary limit)
*/
#define MAXDIM 6

/*
* Document-class: PG::BinaryDecoder::Array < PG::CompositeDecoder
*
* This is a decoder class for conversion of binary array types.
*
* It returns an Array with possibly an arbitrary number of sub-Arrays.
* All values are decoded according to the #elements_type accessor.
* Sub-arrays are decoded recursively.
*
* This decoder simply ignores any dimension decorations preceding the array values.
* It returns all array values as regular ruby Array with a zero based index, regardless of the index given in the dimension decoration.
*
* An array decoder which respects dimension decorations is waiting to be implemented.
*
*/
static VALUE
pg_bin_dec_array(t_pg_coder *conv, const char *input_line, int len, int tuple, int field, int enc_idx)
{
t_pg_composite_coder *this = (t_pg_composite_coder *)conv;
t_pg_coder_dec_func dec_func = pg_coder_dec_func(this->elem, this->comp.format);

/* Current field */
VALUE field_str;

int32_t nitems32;
int i;
int ndim;
int nitems;
int flags;
int dim;
int dim_sizes[MAXDIM];
VALUE arrays[MAXDIM];
char *output_ptr;
const char *cur_ptr;
const char *line_end_ptr;
char *end_capa_ptr;

/* Allocate a new string with embedded capacity and realloc later with
* exponential growing size when needed. */
PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr );

/* set pointer variables for loop */
cur_ptr = input_line;
line_end_ptr = input_line + len;

/* read number of dimensions */
if (line_end_ptr - cur_ptr < 4 ) goto length_error;
ndim = read_nbo32(cur_ptr);
if (ndim < 0 || ndim > MAXDIM) {
rb_raise( rb_eArgError, "unsupported number of array dimensions: %d", ndim );
}
cur_ptr += 4;

/* read flags */
if (line_end_ptr - cur_ptr < 4 ) goto length_error;
flags = read_nbo32(cur_ptr);
if (flags != 0 && flags != 1) {
rb_raise( rb_eArgError, "unsupported binary array flags: %d", flags );
}
cur_ptr += 4;

/* ignore element OID */
if (line_end_ptr - cur_ptr < 4 ) goto length_error;
cur_ptr += 4;

nitems32 = ndim == 0 ? 0 : 1;
for (i = 0; i < ndim; i++) {
int64_t prod;

/* read size of dimensions and ignore lower bound */
if (line_end_ptr - cur_ptr < 8 ) goto length_error;
dim_sizes[i] = read_nbo32(cur_ptr);
prod = (int64_t) nitems32 * (int64_t) dim_sizes[i];
nitems32 = (int32_t) prod;
if (dim_sizes[i] < 0 || (int64_t) nitems32 != prod) {
rb_raise( rb_eArgError, "unsupported array size: %" PRId64, prod );
}
cur_ptr += 8;
}
nitems = (int)nitems32;

dim = 0;
arrays[dim] = rb_ary_new2(ndim == 0 ? 0 : dim_sizes[dim]);
for (i = 0; i < nitems; i++) {
int input_len;

/* traverse dimensions down */
while (dim < ndim - 1) {
dim++;
arrays[dim] = rb_ary_new2(dim_sizes[dim]);
rb_ary_push(arrays[dim - 1], arrays[dim]);
}

/* read element length */
if (line_end_ptr - cur_ptr < 4 ) goto length_error;
input_len = read_nbo32(cur_ptr);
cur_ptr += 4;

/* convert and put element into array */
if (input_len < 0) {
if (input_len != -1) goto length_error;
/* NULL indicator */
rb_ary_push(arrays[dim], Qnil);
} else {
VALUE field_value;
if (line_end_ptr - cur_ptr < input_len ) goto length_error;

/* copy input data to field_str */
PG_RB_STR_ENSURE_CAPA( field_str, input_len, output_ptr, end_capa_ptr );
memcpy(output_ptr, cur_ptr, input_len);
cur_ptr += input_len;
output_ptr += input_len;
/* convert field_str through the type map */
rb_str_set_len( field_str, output_ptr - RSTRING_PTR(field_str) );
field_value = dec_func(this->elem, RSTRING_PTR(field_str), input_len, tuple, field, enc_idx);

rb_ary_push(arrays[dim], field_value);

if( field_value == field_str ){
/* Our output string will be send to the user, so we can not reuse
* it for the next field. */
PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr );
}
}

/* Reset the pointer to the start of the output/buffer string. */
output_ptr = RSTRING_PTR(field_str);

/* traverse dimensions up */
while (RARRAY_LEN(arrays[dim]) >= dim_sizes[dim] && dim > 0) {
dim--;
}
}

if (cur_ptr < line_end_ptr)
rb_raise( rb_eArgError, "trailing data after binary array data at position: %ld", (long)(cur_ptr - input_line) + 1 );

return arrays[0];

length_error:
rb_raise( rb_eArgError, "premature end of binary array data at position: %ld", (long)(cur_ptr - input_line) + 1 );
}

#define PG_INT64_MIN (-0x7FFFFFFFFFFFFFFFL - 1)
#define PG_INT64_MAX 0x7FFFFFFFFFFFFFFFL

Expand Down Expand Up @@ -305,6 +453,8 @@ init_pg_binary_decoder(void)
/* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "Timestamp", rb_cPG_SimpleDecoder ); */
pg_define_coder( "Timestamp", pg_bin_dec_timestamp, rb_cPG_SimpleDecoder, rb_mPG_BinaryDecoder );

/* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "Array", rb_cPG_CompositeDecoder ); */
pg_define_coder( "Array", pg_bin_dec_array, rb_cPG_CompositeDecoder, rb_mPG_BinaryDecoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryDecoder, "ToBase64", rb_cPG_CompositeDecoder ); */
pg_define_coder( "ToBase64", pg_bin_dec_to_base64, rb_cPG_CompositeDecoder, rb_mPG_BinaryDecoder );
}
197 changes: 197 additions & 0 deletions ext/pg_binary_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,201 @@ pg_bin_enc_date(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate, i
return 4;
}

/*
* Maximum number of array subscripts (arbitrary limit)
*/
#define MAXDIM 6

/*
* Document-class: PG::BinaryEncoder::Array < PG::CompositeEncoder
*
* This is the encoder class for PostgreSQL array types in binary format.
*
* All values are encoded according to the #elements_type
* accessor. Sub-arrays are encoded recursively.
*
* This encoder expects an Array of values or sub-arrays as input.
* Other values are passed through as byte string without interpretation.
*
* The accessors needs_quotation and delimiter are ignored for binary encoding.
*
*/
static int
pg_bin_enc_array(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
if (TYPE(value) == T_ARRAY) {
t_pg_composite_coder *this = (t_pg_composite_coder *)conv;
t_pg_coder_enc_func enc_func = pg_coder_enc_func(this->elem);
int dim_sizes[MAXDIM];
int ndim = 1;
int nitems = 1;
VALUE el1 = value;

if (RARRAY_LEN(value) == 0) {
nitems = 0;
ndim = 0;
dim_sizes[0] = 0;
} else {
/* Determine number of dimensions, sizes of dimensions and number of items */
while(1) {
VALUE el2;

dim_sizes[ndim-1] = RARRAY_LENINT(el1);
nitems *= dim_sizes[ndim-1];
el2 = rb_ary_entry(el1, 0);
if (TYPE(el2) == T_ARRAY) {
ndim++;
if (ndim > MAXDIM)
rb_raise( rb_eArgError, "unsupported number of array dimensions: >%d", ndim );
} else {
break;
}
el1 = el2;
}
}

if(out){
/* Second encoder pass -> write data to `out` */
int dimpos[MAXDIM];
VALUE arrays[MAXDIM];
int dim = 0;
int item_idx = 0;
int i;
char *orig_out = out;
Oid elem_oid = this->elem ? this->elem->oid : 0;

write_nbo32(ndim, out); out += 4;
write_nbo32(1 /* flags */, out); out += 4;
write_nbo32(elem_oid, out); out += 4;
for (i = 0; i < ndim; i++) {
dimpos[i] = 0;
write_nbo32(dim_sizes[i], out); out += 4;
write_nbo32(1 /* offset */, out); out += 4;
}
arrays[0] = value;

while(1) {
/* traverse tree down */
while (dim < ndim - 1) {
arrays[dim + 1] = rb_ary_entry(arrays[dim], dimpos[dim]);
dim++;
}

for (i = 0; i < dim_sizes[dim]; i++) {
VALUE item = rb_ary_entry(arrays[dim], i);

if (NIL_P(item)) {
write_nbo32(-1, out); out += 4;
} else {
/* Encoded string is returned in subint */
int strlen;
VALUE is_one_pass = rb_ary_entry(*intermediate, item_idx++);
VALUE subint = rb_ary_entry(*intermediate, item_idx++);

if (is_one_pass == Qtrue) {
strlen = RSTRING_LENINT(subint);
memcpy( out + 4, RSTRING_PTR(subint), strlen);
} else {
strlen = enc_func(this->elem, item, out + 4, &subint, enc_idx);
}
write_nbo32(strlen, out);
out += 4 /* length */ + strlen;
}
}

/* traverse tree up and go to next sibling array */
do {
if (dim > 0) {
dimpos[dim] = 0;
dim--;
dimpos[dim]++;
} else {
goto finished2;
}
} while (dimpos[dim] >= dim_sizes[dim]);
}
finished2:
return (int)(out - orig_out);

} else {
/* First encoder pass -> determine required buffer space for `out` */

int dimpos[MAXDIM];
VALUE arrays[MAXDIM];
int dim = 0;
int item_idx = 0;
int i;
int size_sum = 0;

*intermediate = rb_ary_new2(nitems);

for (i = 0; i < MAXDIM; i++) {
dimpos[i] = 0;
}
arrays[0] = value;

while(1) {

/* traverse tree down */
while (dim < ndim - 1) {
VALUE array = rb_ary_entry(arrays[dim], dimpos[dim]);
if (TYPE(array) != T_ARRAY) {
rb_raise( rb_eArgError, "expected Array instead of %+"PRIsVALUE" in dimension %d", array, dim + 1 );
}
if (dim_sizes[dim + 1] != RARRAY_LEN(array)) {
rb_raise( rb_eArgError, "varying number of array elements (%d and %d) in dimension %d", dim_sizes[dim + 1], RARRAY_LENINT(array), dim + 1 );
}
arrays[dim + 1] = array;
dim++;
}

for (i = 0; i < dim_sizes[dim]; i++) {
VALUE item = rb_ary_entry(arrays[dim], i);

if (NIL_P(item)) {
size_sum += 4 /* length bytes = -1 */;
} else {
VALUE subint;
int strlen = enc_func(this->elem, item, NULL, &subint, enc_idx);

/* Gather all intermediate values of elements into an array, which is returned as intermediate for the array encoder */
if( strlen == -1 ){
/* Encoded string is returned in subint */
rb_ary_store(*intermediate, item_idx++, Qtrue);
rb_ary_store(*intermediate, item_idx++, subint);

strlen = RSTRING_LENINT(subint);
} else {
/* Two passes necessary */
rb_ary_store(*intermediate, item_idx++, Qfalse);
rb_ary_store(*intermediate, item_idx++, subint);
}
size_sum += 4 /* length bytes */ + strlen;
}
}

/* traverse tree up and go to next sibling array */
do {
if (dim > 0) {
dimpos[dim] = 0;
dim--;
dimpos[dim]++;
} else {
goto finished1;
}
} while (dimpos[dim] >= dim_sizes[dim]);
}
finished1:;

return 4 /* ndim */ + 4 /* flags */ + 4 /* oid */ +
ndim * (4 /* dim size */ + 4 /* dim offset */) +
size_sum;
}
} else {
return pg_coder_enc_to_s( conv, value, out, intermediate, enc_idx );
}
}

/*
* Document-class: PG::BinaryEncoder::FromBase64 < PG::CompositeEncoder
*
Expand Down Expand Up @@ -381,6 +576,8 @@ init_pg_binary_encoder(void)
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Date", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Date", pg_bin_enc_date, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );

/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Array", rb_cPG_CompositeEncoder ); */
pg_define_coder( "Array", pg_bin_enc_array, rb_cPG_CompositeEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "FromBase64", rb_cPG_CompositeEncoder ); */
pg_define_coder( "FromBase64", pg_bin_enc_from_base64, rb_cPG_CompositeEncoder, rb_mPG_BinaryEncoder );
}
4 changes: 2 additions & 2 deletions lib/pg/basic_type_registry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def initialize(connection, registry: nil)
@maps = [
[0, :encoder, PG::TextEncoder::Array],
[0, :decoder, PG::TextDecoder::Array],
[1, :encoder, nil],
[1, :decoder, nil],
[1, :encoder, PG::BinaryEncoder::Array],
[1, :decoder, PG::BinaryDecoder::Array],
].inject([]) do |h, (format, direction, arraycoder)|
coders = registry.coders_for(format, direction) || {}
h[format] ||= {}
Expand Down
Loading