Skip to content
Browse files

More efficient handling of conversion procs for large datasets

Switch to filling the conversion proc array before iterating over
results, instead of checking inside every column value to see if
a conversion proc is available.

For speed reasons, don't check for conversion procs for char,
varchar, and text columns, since those should be returned as
strings.
  • Loading branch information...
1 parent 884e6b9 commit 8b277ebac024ded5b4b8f9b17916507591480599 @jeremyevans committed Mar 16, 2011
Showing with 35 additions and 4 deletions.
  1. +35 −4 ext/sequel_pg/sequel_pg.c
View
39 ext/sequel_pg/sequel_pg.c
@@ -257,7 +257,33 @@ static VALUE spg_yield_hash_rows(VALUE self, VALUE rres, VALUE ignore) {
for(j=0; j<nfields; j++) {
colsyms[j] = rb_funcall(self, spg_id_output_identifier, 1, rb_str_new2(PQfname(res, j)));
- colconvert[j] = Qfalse;
+ i = PQftype(res, j);
+ switch (i) {
+ case 16:
+ case 17:
+ case 20:
+ case 21:
+ case 22:
+ case 23:
+ case 26:
+ case 700:
+ case 701:
+ case 790:
+ case 1700:
+ case 1082:
+ case 1083:
+ case 1266:
+ case 1114:
+ case 1184:
+ case 18:
+ case 25:
+ case 1043:
+ colconvert[j] = Qnil;
+ break;
+ default:
+ colconvert[j] = rb_funcall(spg_PG_TYPES, spg_id_get, 1, INT2NUM(i));
+ break;
+ }
}
rb_ivar_set(self, spg_id_columns, rb_ary_new4(nfields, colsyms));
@@ -304,14 +330,19 @@ static VALUE spg_yield_hash_rows(VALUE self, VALUE rres, VALUE ignore) {
case 1184:
rv = spg_timestamp(v);
break;
+ case 18: /* char */
+ case 25: /* text */
+ case 1043: /* varchar*/
+ rv = rb_tainted_str_new(v, PQgetlength(res, i, j));
+#ifdef SPG_ENCODING
+ rb_enc_associate_index(rv, enc_index);
+#endif
+ break;
default:
rv = rb_tainted_str_new(v, PQgetlength(res, i, j));
#ifdef SPG_ENCODING
rb_enc_associate_index(rv, enc_index);
#endif
- if (colconvert[j] == Qfalse) {
- colconvert[j] = rb_funcall(spg_PG_TYPES, spg_id_get, 1, INT2NUM(PQftype(res, j)));
- }
if (colconvert[j] != Qnil) {
rv = rb_funcall(colconvert[j], spg_id_call, 1, rv);
}

2 comments on commit 8b277eb

@funny-falcon

Checking for text is a good catch :)
But, is it really faster to get rid of if (colconvert[j] == Qfalse) { ... }?
One index check is not that big performance hit, but allows to remain maintainable.

@jeremyevans
Owner

But you are doing the index check in the inner loop. It's O(m*n) where m is the number of columns and n is the number of rows. My implementation is O(m), since it only does one check per column. Not that I think it'll make a big performance difference, but in the inner loop every little bit helps.

Please sign in to comment.
Something went wrong with that request. Please try again.