Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 1263 lines (1095 sloc) 33.335 kb
0fc1320 Importing prefix operator and opclass files
dim authored
1 /**
2 * Prefix opclass allows to efficiently index a prefix table with
3 * GiST.
4 *
5 * More common use case is telephony prefix searching for cost or
6 * routing.
7 *
8 * Many thanks to AndrewSN, who provided great amount of help in the
9 * writting of this opclass, on the PostgreSQL internals, GiST inner
10 * working and prefix search analyses.
11 *
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
12 * $Id: prefix.c,v 1.17 2008/03/13 10:00:12 dim Exp $
0fc1320 Importing prefix operator and opclass files
dim authored
13 */
14
15 #include <stdio.h>
16 #include "postgres.h"
17
18 #include "access/gist.h"
19 #include "access/skey.h"
20 #include "utils/elog.h"
21 #include "utils/palloc.h"
22 #include "utils/builtins.h"
23 #include <math.h>
24
25 #define DEBUG
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
26 /**
27 * We use those DEBUG defines in the code, uncomment them to get very
28 * verbose output.
29 *
30 #define DEBUG_UNION
31 #define DEBUG_PENALTY
32 #define DEBUG_PRESORT_GP
33 #define DEBUG_PRESORT_MAX
34 #define DEBUG_PRESORT_UNIONS
35 #define DEBUG_PRESORT_RESULT
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
36
37 #define DEBUG_PR_IN
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
38 */
0fc1320 Importing prefix operator and opclass files
dim authored
39
40 PG_MODULE_MAGIC;
41
42 /**
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
43 * This code has only been tested with PostgreSQL 8.2 and 8.3
44 */
9ececbd Now using PG_VERSION_NUM
dim authored
45 #if PG_VERSION_NUM / 100 != 802 && PG_VERSION_NUM / 100 != 803
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
46 #error "Unknown or unsupported postgresql version"
47 #endif
48
49 /**
50 * Define our own varlena size macro depending on PGVER
51 */
9ececbd Now using PG_VERSION_NUM
dim authored
52 #if PG_VERSION_NUM / 100 == 802
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
53 #define PREFIX_VARSIZE(x) (VARSIZE(x) - VARHDRSZ)
54 #define PREFIX_VARDATA(x) (VARDATA(x))
55 #define PREFIX_PG_GETARG_TEXT(x) (PG_GETARG_TEXT_P(x))
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
56 #define PREFIX_SET_VARSIZE(p, s) (VARATT_SIZEP(p) = s + VARHDRSZ)
57
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
58 #else
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
59 #define PREFIX_VARSIZE(x) (VARSIZE_ANY_EXHDR(x))
60 #define PREFIX_VARDATA(x) (VARDATA_ANY(x))
61 #define PREFIX_PG_GETARG_TEXT(x) (PG_GETARG_TEXT_PP(x))
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
62 #define PREFIX_SET_VARSIZE(p, s) (SET_VARSIZE(p, s))
63 #endif
64
65 /**
66 * prefix_range datatype, varlena structure
67 */
68 typedef struct {
69 char first;
70 int last;
71 char prefix[1]; /* this is a varlena structure, data follows */
72 } prefix_range;
73
74 enum pr_delimiters_t {
75 PR_OPEN = '[',
76 PR_CLOSE = ']',
77 PR_SEP = '-'
78 } pr_delimiters;
79
80 /**
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
81 * prefix_range input/output functions and operators
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
82 */
83 Datum prefix_range_in(PG_FUNCTION_ARGS);
84 Datum prefix_range_out(PG_FUNCTION_ARGS);
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
85 Datum prefix_range_cast_to_text(PG_FUNCTION_ARGS);
86 Datum prefix_range_cast_from_text(PG_FUNCTION_ARGS);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
87 Datum prefix_range_eq(PG_FUNCTION_ARGS);
88 Datum prefix_range_neq(PG_FUNCTION_ARGS);
89 Datum prefix_range_overlaps(PG_FUNCTION_ARGS);
90 Datum prefix_range_contains(PG_FUNCTION_ARGS);
91 Datum prefix_range_contains_strict(PG_FUNCTION_ARGS);
92 Datum prefix_range_contained_by(PG_FUNCTION_ARGS);
93 Datum prefix_range_contained_by_strict(PG_FUNCTION_ARGS);
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
94 Datum prefix_range_contains_prefix(PG_FUNCTION_ARGS);
95 Datum prefix_range_contained_by_prefix(PG_FUNCTION_ARGS);
2425c39 Implementation of some more operators for prefix_range: =, <>, <<, <<…
dim authored
96 /*
97 Datum prefix_range_recv(PG_FUNCTION_ARGS);
98 Datum prefix_range_send(PG_FUNCTION_ARGS);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
99 Datum prefix_range_union(PG_FUNCTION_ARGS);
100 Datum prefix_range_inter(PG_FUNCTION_ARGS);
101 */
102
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
103 #define DatumGetPrefixRange(X) ((prefix_range *) PREFIX_VARDATA(DatumGetPointer(X)) )
104 #define PrefixRangeGetDatum(X) PointerGetDatum(make_varlena(X))
105 #define PG_GETARG_PREFIX_RANGE_P(n) DatumGetPrefixRange(PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
106 #define PG_RETURN_PREFIX_RANGE_P(x) return PrefixRangeGetDatum(x)
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
107
108 /**
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
109 * Used by prefix_contains_internal and pr_contains_prefix.
110 *
111 * plen is the length of string p, qlen the length of string q, the
112 * caller are dealing with either text * or char * and its their
113 * responsabolity to use either strlen() or PREFIX_VARSIZE()
114 */
115 static inline
116 bool __prefix_contains(char *p, char *q, int plen, int qlen) {
117 if(qlen < plen )
118 return false;
119
120 return memcmp(p, q, plen) == 0;
121 }
122
123 /**
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
124 * First, the input reader. A prefix range will have to respect the
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
125 * following regular expression: .*([[].-.[]])?
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
126 *
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
127 * examples : 123[4-6], [1-3], 234, 01[] --- last one not covered by
128 * regexp.
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
129 */
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
130
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
131 static inline
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
132 prefix_range *build_pr(char *prefix) {
133 int s = strlen(prefix) + 1;
134 prefix_range *pr = palloc(sizeof(prefix_range) + s);
135 memcpy(pr->prefix, prefix, s);
136 pr->first = 0;
137 pr->last = 0;
138
139 #ifdef DEBUG_PR_IN
140 elog(NOTICE,
141 "build_pr: pr->prefix = '%s', pr->first = %d, pr->last = %d",
142 pr->prefix, pr->first, pr->last);
143 #endif
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
144
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
145 return pr;
146 }
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
147
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
148 static inline
149 prefix_range *pr_from_str(char *str) {
150 prefix_range *pr = NULL;
151 char *prefix = (char *)palloc(strlen(str)+1);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
152 char current = 0, previous = 0;
153 bool opened = false;
154 bool closed = false;
155 bool sawsep = false;
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
156 char *ptr, *prefix_ptr = prefix;
157 char tmpswap;
158
159 bzero(prefix, strlen(str)+1);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
160
161 for(ptr=str; *ptr != 0; ptr++) {
162 previous = current;
163 current = *ptr;
164
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
165 if( !opened && current != PR_OPEN )
166 *prefix_ptr++ = current;
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
167
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
168 #ifdef DEBUG_PR_IN
169 elog(NOTICE, "prefix_range previous='%c' current='%c' prefix='%s'",
170 (previous?previous:' '), current, prefix);
171 #endif
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
172
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
173 switch( current ) {
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
174
175 case PR_OPEN:
176 if( opened ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
177 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
178 elog(ERROR,
179 "prefix_range %s contains several %c", str, PR_OPEN);
180 #endif
181 return NULL;
182 }
183 opened = true;
184
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
185 pr = build_pr(prefix);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
186 break;
187
188 case PR_SEP:
189 if( opened ) {
190 if( closed ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
191 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
192 elog(ERROR,
193 "prefix_range %s contains trailing character", str);
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
194 #endif
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
195 return NULL;
196 }
197 sawsep = true;
198
199 if( previous == PR_OPEN ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
200 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
201 elog(ERROR,
202 "prefix_range %s has separator following range opening, without data", str);
203 #endif
204 return NULL;
205 }
206
207 pr->first = previous;
208 }
209 break;
210
211 case PR_CLOSE:
212 if( !opened ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
213 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
214 elog(ERROR,
215 "prefix_range %s closes a range which is not opened ", str);
216 #endif
217 return NULL;
218 }
219
220 if( closed ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
221 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
222 elog(ERROR,
223 "prefix_range %s contains several %c", str, PR_CLOSE);
224 #endif
225 return NULL;
226 }
227 closed = true;
228
229 if( sawsep ) {
230 if( previous == PR_SEP ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
231 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
232 elog(ERROR,
233 "prefix_range %s has a closed range without last bound", str);
234 #endif
235 return NULL;
236 }
237 pr->last = previous;
238 }
239 else {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
240 if( previous != PR_OPEN ) {
241 #ifdef DEBUG_PR_IN
242 elog(ERROR,
243 "prefix_range %s has a closing range without separator", str);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
244 #endif
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
245 return NULL;
246 }
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
247 }
248 break;
249
250 default:
251 if( closed ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
252 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
253 elog(ERROR,
254 "prefix_range %s contains trailing characters", str);
255 #endif
256 return NULL;
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
257 }
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
258 break;
259 }
260 }
261
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
262 if( ! opened ) {
263 pr = build_pr(prefix);
264 }
265
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
266 if( opened && !closed ) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
267 #ifdef DEBUG_PR_IN
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
268 elog(ERROR, "prefix_range %s opens a range but does not close it", str);
269 #endif
270 return NULL;
271 }
272
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
273 /**
274 * Ensure first < last
275 */
276 if( pr->first == pr->last ) {
277 int s = strlen(pr->prefix)+2;
278 prefix = (char *)palloc(s);
279 memcpy(prefix, pr->prefix, s-2);
280 prefix[s-2] = pr->first;
281 prefix[s-1] = 0;
282
283 #ifdef DEBUG_PR_IN
284 elog(NOTICE, "prefix_range %s %s %s", str, pr->prefix, prefix);
285 #endif
286
287 pfree(pr);
288 pr = build_pr(prefix);
289 }
290 else if( pr->first > pr->last ) {
291 tmpswap = pr->first;
292 pr->first = pr->last;
293 pr->last = tmpswap;
294 }
295
296 #ifdef DEBUG_PR_IN
297 if( pr != NULL ) {
298 if( pr->first && pr->last )
299 elog(NOTICE,
300 "prefix_range %s: prefix = '%s', first = '%c', last = '%c'",
301 str, pr->prefix, pr->first, pr->last);
302 else
303 elog(NOTICE,
304 "prefix_range %s: prefix = '%s', no first nor last",
305 str, pr->prefix);
306 }
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
307 #endif
308
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
309 return pr;
310 }
311
312 static inline
313 struct varlena *make_varlena(prefix_range *pr) {
314 struct varlena *vdat;
315 int size;
316
317 if (pr != NULL) {
318 size = sizeof(prefix_range) + sizeof(pr->prefix) + VARHDRSZ;
319 vdat = palloc(size);
320 PREFIX_SET_VARSIZE(vdat, size);
321 memcpy(VARDATA(vdat), pr, size - VARHDRSZ);
322
323 return vdat;
324 }
325 return NULL;
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
326 }
327
2425c39 Implementation of some more operators for prefix_range: =, <>, <<, <<…
dim authored
328 static inline
329 bool pr_eq(prefix_range *a, prefix_range *b) {
330 int sa = strlen(a->prefix);
331 int sb = strlen(b->prefix);
332
333 return sa == sb
334 && memcmp(a->prefix, b->prefix, sa) == 0
335 && a->first == b->first
336 && a->last == b->last;
337 }
338
339 /**
340 * TODO
341 *
342 * right prefix range overlaps left one when any text prefixed by left
343 * is known to be prefixed by any right prefix.
344 */
345 static inline
346 bool pr_overlaps(prefix_range *left, prefix_range *right) {
347 return false;
348 }
349
350 static inline
351 bool pr_contains(prefix_range *left, prefix_range *right, bool eqval) {
352 int sl;
353 int sr;
354 bool left_prefixes_right;
355
356 if( pr_eq(left, right) )
357 return eqval;
358
359 sl = strlen(left->prefix);
360 sr = strlen(right->prefix);
361
362 if( sr < sl )
363 return false;
364
365 left_prefixes_right = memcmp(left->prefix, right->prefix, sl) == 0;
366
367 if( left_prefixes_right ) {
368 if( sl == sr )
369 return left->first == 0 ||
370 (left->first <= right->first && left->last >= right->last);
371
372 return left->first == 0 ||
373 (left->first <= right->prefix[sl] && right->prefix[sl] <= left->last);
374 }
375 return false;
376 }
377
378 /**
379 * TODO
380 */
381 static inline
382 prefix_range *pr_union(prefix_range *a, prefix_range *b) {
383 prefix_range *res = NULL;
384
385 return res;
386 }
387
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
388 /**
389 * does a given prefix_range includes a given prefix?
390 */
391 static inline
392 bool pr_contains_prefix(prefix_range *pr, text *query, bool eqval) {
393 int plen = strlen(pr->prefix);
394 int qlen = PREFIX_VARSIZE(query);
395 char *p = pr->prefix;
396 char *q = (char *)PREFIX_VARDATA(query);
397
398 if( __prefix_contains(p, q, plen, qlen) ) {
399 if( pr->first == 0 || qlen == plen ) {
400 return eqval;
401 }
402
403 /**
404 * __prefix_contains() is true means qlen >= plen, and previous
405 * test ensures qlen != plen, we hence assume qlen > plen.
406 */
407 Assert(qlen > plen);
408 return pr-> first <= q[plen] && q[plen] <= pr->last;
409 }
410 return false;
411 }
412
413
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
414 PG_FUNCTION_INFO_V1(prefix_range_in);
415 Datum
416 prefix_range_in(PG_FUNCTION_ARGS)
417 {
418 char *str = PG_GETARG_CSTRING(0);
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
419 prefix_range *pr = pr_from_str(str);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
420
421 if (pr != NULL) {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
422 PG_RETURN_PREFIX_RANGE_P(pr);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
423 }
424
425 ereport(ERROR,
426 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
427 errmsg("invalid prefix_range value: \"%s\"", str)));
428 PG_RETURN_NULL();
429 }
430
431
432 PG_FUNCTION_INFO_V1(prefix_range_out);
433 Datum
434 prefix_range_out(PG_FUNCTION_ARGS)
435 {
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
436 prefix_range *pr = PG_GETARG_PREFIX_RANGE_P(0);
437 char *out = NULL;
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
438
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
439 if( pr->first ) {
440 out = (char *)palloc((strlen(pr->prefix)+6) * sizeof(char));
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
441 sprintf(out, "%s[%c-%c]", pr->prefix, pr->first, pr->last);
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
442 }
443 else {
444 out = (char *)palloc((strlen(pr->prefix)+3) * sizeof(char));
445 sprintf(out, "%s[]", pr->prefix);
446 }
447 PG_RETURN_CSTRING(out);
448 }
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
449
a4da0ee prefix_range datatype now has working in/out functions and text casts
dim authored
450 PG_FUNCTION_INFO_V1(prefix_range_cast_from_text);
451 Datum
452 prefix_range_cast_from_text(PG_FUNCTION_ARGS)
453 {
454 text *txt = PG_GETARG_TEXT_P(0);
455 Datum cstring = DirectFunctionCall1(textout, PointerGetDatum(txt));
456 return DirectFunctionCall1(prefix_range_in, cstring);
457 }
458
459 PG_FUNCTION_INFO_V1(prefix_range_cast_to_text);
460 Datum
461 prefix_range_cast_to_text(PG_FUNCTION_ARGS)
462 {
463 prefix_range *pr = PG_GETARG_PREFIX_RANGE_P(0);
464 Datum cstring;
465 text *out;
466
467 if (pr != NULL) {
468 cstring = DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(pr));
469 out = (text *)DirectFunctionCall1(textin, cstring);
470
471 PG_RETURN_TEXT_P(out);
472 }
473 PG_RETURN_NULL();
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
474 }
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
475
2425c39 Implementation of some more operators for prefix_range: =, <>, <<, <<…
dim authored
476 PG_FUNCTION_INFO_V1(prefix_range_eq);
477 Datum
478 prefix_range_eq(PG_FUNCTION_ARGS)
479 {
480 PG_RETURN_BOOL( pr_eq(PG_GETARG_PREFIX_RANGE_P(0),
481 PG_GETARG_PREFIX_RANGE_P(1)) );
482 }
483
484 PG_FUNCTION_INFO_V1(prefix_range_neq);
485 Datum
486 prefix_range_neq(PG_FUNCTION_ARGS)
487 {
488 PG_RETURN_BOOL( ! pr_eq(PG_GETARG_PREFIX_RANGE_P(0),
489 PG_GETARG_PREFIX_RANGE_P(1)) );
490 }
491
492 PG_FUNCTION_INFO_V1(prefix_range_overlaps);
493 Datum
494 prefix_range_overlaps(PG_FUNCTION_ARGS)
495 {
496 ereport(ERROR,
497 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
498 errmsg("prefix_range overlaps is not yep implemented.")));
499
500 PG_RETURN_BOOL( pr_overlaps(PG_GETARG_PREFIX_RANGE_P(0),
501 PG_GETARG_PREFIX_RANGE_P(1)) );
502 }
503
504 PG_FUNCTION_INFO_V1(prefix_range_contains);
505 Datum
506 prefix_range_contains(PG_FUNCTION_ARGS)
507 {
508 PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(0),
509 PG_GETARG_PREFIX_RANGE_P(1),
510 TRUE ));
511 }
512
513 PG_FUNCTION_INFO_V1(prefix_range_contains_strict);
514 Datum
515 prefix_range_contains_strict(PG_FUNCTION_ARGS)
516 {
517 PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(0),
518 PG_GETARG_PREFIX_RANGE_P(1),
519 FALSE ));
520 }
521
522 PG_FUNCTION_INFO_V1(prefix_range_contained_by);
523 Datum
524 prefix_range_contained_by(PG_FUNCTION_ARGS)
525 {
526 PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(1),
527 PG_GETARG_PREFIX_RANGE_P(0),
528 TRUE ));
529 }
530
531 PG_FUNCTION_INFO_V1(prefix_range_contained_by_strict);
532 Datum
533 prefix_range_contained_by_strict(PG_FUNCTION_ARGS)
534 {
535 PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(1),
536 PG_GETARG_PREFIX_RANGE_P(0),
537 FALSE ));
538 }
539
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
540 PG_FUNCTION_INFO_V1(prefix_range_contains_prefix);
541 Datum
542 prefix_range_contains_prefix(PG_FUNCTION_ARGS)
543 {
544 PG_RETURN_BOOL( pr_contains_prefix(PG_GETARG_PREFIX_RANGE_P(0),
545 PG_GETARG_TEXT_P(1),
546 TRUE ));
547 }
548
549 PG_FUNCTION_INFO_V1(prefix_range_contained_by_prefix);
550 Datum
551 prefix_range_contained_by_prefix(PG_FUNCTION_ARGS)
552 {
553 PG_RETURN_BOOL( pr_contains_prefix(PG_GETARG_PREFIX_RANGE_P(1),
554 PG_GETARG_TEXT_P(0),
555 TRUE ));
556 }
557
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
558 /**
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
559 * - Operator prefix @> query and query <@ prefix
560 * - greater_prefix, exposed as a func and an aggregate
561 * - prefix_penalty, exposed for testing purpose
0fc1320 Importing prefix operator and opclass files
dim authored
562 */
563 Datum prefix_contains(PG_FUNCTION_ARGS);
564 Datum prefix_contained_by(PG_FUNCTION_ARGS);
565 Datum greater_prefix(PG_FUNCTION_ARGS);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
566 Datum prefix_penalty(PG_FUNCTION_ARGS);
0fc1320 Importing prefix operator and opclass files
dim authored
567
568 /**
569 * GiST support methods
570 */
571 Datum gprefix_consistent(PG_FUNCTION_ARGS);
572 Datum gprefix_compress(PG_FUNCTION_ARGS);
573 Datum gprefix_decompress(PG_FUNCTION_ARGS);
574 Datum gprefix_penalty(PG_FUNCTION_ARGS);
575 Datum gprefix_picksplit(PG_FUNCTION_ARGS);
576 Datum gprefix_union(PG_FUNCTION_ARGS);
577 Datum gprefix_same(PG_FUNCTION_ARGS);
578
579 /**
580 * prefix opclass only provides 1 operator, @>
581 */
582 static inline
583 bool prefix_contains_internal(text *prefix, text *query, bool eqval)
584 {
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
585 int plen = PREFIX_VARSIZE(prefix);
586 int qlen = PREFIX_VARSIZE(query);
587 char *p = PREFIX_VARDATA(prefix);
588 char *q = PREFIX_VARDATA(query);
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
589
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
590 if( __prefix_contains(p, q, plen, qlen) )
0fc1320 Importing prefix operator and opclass files
dim authored
591 return eqval;
592
e2307bd prefix_range @> prefix(text) operator and its commutator
dim authored
593 return false;
0fc1320 Importing prefix operator and opclass files
dim authored
594 }
595
596 /**
597 * The operator @> code
598 */
599 PG_FUNCTION_INFO_V1(prefix_contains);
600 Datum
601 prefix_contains(PG_FUNCTION_ARGS)
602 {
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
603 PG_RETURN_BOOL( prefix_contains_internal(PREFIX_PG_GETARG_TEXT(0),
604 PREFIX_PG_GETARG_TEXT(1),
605 true) );
0fc1320 Importing prefix operator and opclass files
dim authored
606 }
607
608 /**
609 * The commutator, <@, using the same internal code
610 */
611 PG_FUNCTION_INFO_V1(prefix_contained_by);
612 Datum
613 prefix_contained_by(PG_FUNCTION_ARGS)
614 {
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
615 PG_RETURN_BOOL( prefix_contains_internal(PREFIX_PG_GETARG_TEXT(1),
616 PREFIX_PG_GETARG_TEXT(0),
7a4fd5a First steps at implementing a prefix_range datatype
dim authored
617 true) );
0fc1320 Importing prefix operator and opclass files
dim authored
618 }
619
620 /**
621 * greater_prefix returns the greater prefix of any 2 given texts
622 */
623 static inline
624 text *greater_prefix_internal(text *a, text *b)
625 {
626 int i = 0;
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
627 int la = PREFIX_VARSIZE(a);
628 int lb = PREFIX_VARSIZE(b);
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
629 char *ca = PREFIX_VARDATA(a);
630 char *cb = PREFIX_VARDATA(b);
0fc1320 Importing prefix operator and opclass files
dim authored
631
632 for(i=0; i<la && i<lb && ca[i] == cb[i]; i++);
633
634 /* i is the last common char position in a, or 0 */
635 if( i == 0 )
636 return DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum("")));
637 else
638 return DatumGetTextPSlice(PointerGetDatum(a), 0, i);
639 }
640
641 PG_FUNCTION_INFO_V1(greater_prefix);
642 Datum
643 greater_prefix(PG_FUNCTION_ARGS)
644 {
645
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
646 PG_RETURN_POINTER( greater_prefix_internal(PREFIX_PG_GETARG_TEXT(0),
647 PREFIX_PG_GETARG_TEXT(1)) );
0fc1320 Importing prefix operator and opclass files
dim authored
648 }
649
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
650 /**
651 * penalty internal function, which is called in more places than just
652 * gist penalty() function, namely picksplit() uses it too.
653 *
654 * Consider greater common prefix length, the greater the better, then
655 * for a distance of 1 (only last prefix char is different), consider
656 * char code distance.
657 *
658 * With gplen the size of the greatest common prefix and dist the char
659 * code distance, the following maths should do (per AndrewSN):
660 *
661 * penalty() = dist / (256 ^ gplen)
662 *
663 * penalty(01, 03) == 2 / (256^1)
664 * penalty(123, 125) == 2 / (256^2)
665 * penalty(12, 56) == 4 / (256^0)
666 * penalty(0, 17532) == 1 / (256^0)
667 *
668 * 256 is then number of codes any text position (char) can admit.
669 */
670 static inline
671 float prefix_penalty_internal(text *orig, text *new)
672 {
673 float penalty;
674 text *gp;
675 int nlen, olen, gplen, dist = 0;
676
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
677 olen = PREFIX_VARSIZE(orig);
678 nlen = PREFIX_VARSIZE(new);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
679 gp = greater_prefix_internal(orig, new);
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
680 gplen = PREFIX_VARSIZE(gp);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
681
682 /**
683 * greater_prefix length is orig length only if orig == gp
684 */
685 if( gplen == olen )
686 penalty = 0;
687
688 dist = 1;
689 if( nlen == olen ) {
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
690 char *o = PREFIX_VARDATA(orig);
691 char *n = PREFIX_VARDATA(new);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
692 dist = abs((int)o[olen-1] - (int)n[nlen-1]);
693 }
694 penalty = (((float)dist) / powf(256, gplen));
695
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
696 #ifdef DEBUG_PENALTY
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
697 elog(NOTICE, "gprefix_penalty_internal(%s, %s) == %d/(256^%d) == %g",
698 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(orig))),
699 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(new))),
700 dist, gplen, penalty);
701 #endif
702
703 return penalty;
704 }
705
706 /**
707 * For testing purposes we export our penalty function to SQL
708 */
709 PG_FUNCTION_INFO_V1(prefix_penalty);
710 Datum
711 prefix_penalty(PG_FUNCTION_ARGS)
712 {
fef1590 8.3 compat now ok, using VARDATA_ANY() and PG_GETARG_TEXT_PP()
dim authored
713 float penalty = prefix_penalty_internal(PREFIX_PG_GETARG_TEXT(0),
714 PREFIX_PG_GETARG_TEXT(1));
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
715
716 PG_RETURN_FLOAT4(penalty);
717 }
0fc1320 Importing prefix operator and opclass files
dim authored
718
719 /**
720 * GiST opclass methods
721 */
722
723 PG_FUNCTION_INFO_V1(gprefix_consistent);
724 Datum
725 gprefix_consistent(PG_FUNCTION_ARGS)
726 {
727 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
728 text *query = (text *) PG_GETARG_POINTER(1);
729 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
730 text *key = (text *) DatumGetPointer(entry->key);
731 bool retval;
732
733 /**
734 * We only have 1 Strategy (operator @>)
3f678f4 First version working, some more doc, avoiding gcc complaints, better…
dim authored
735 * and we want to avoid compiler complaints that we do not use it.
0fc1320 Importing prefix operator and opclass files
dim authored
736 */
737 Assert(strategy == 1);
3f678f4 First version working, some more doc, avoiding gcc complaints, better…
dim authored
738 (void) strategy;
739 retval = prefix_contains_internal(key, query, true);
0fc1320 Importing prefix operator and opclass files
dim authored
740
741 PG_RETURN_BOOL(retval);
742 }
743
744 /**
745 * Prefix penalty: we want the penalty to be lower for closer
746 * prefixes, taking into account length difference and content
747 * distance.
748 *
749 * For examples we want new prefix 125 to be inserted by preference in
750 * the 124 branch, not in a 128 or a 256 branch.
751 *
752 */
753 PG_FUNCTION_INFO_V1(gprefix_penalty);
754 Datum
755 gprefix_penalty(PG_FUNCTION_ARGS)
756 {
757 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
758 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
759 float *penalty = (float *) PG_GETARG_POINTER(2);
760
761 text *orig = (text *) DatumGetPointer(origentry->key);
762 text *new = (text *) DatumGetPointer(newentry->key);
763
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
764 *penalty = prefix_penalty_internal(orig, new);
0fc1320 Importing prefix operator and opclass files
dim authored
765 PG_RETURN_POINTER(penalty);
766 }
767
768 /**
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
769 * prefix picksplit first pass step: presort the SPLITVEC vector by
770 * positionning the elements sharing the non-empty prefix which is the
771 * more frequent in the distribution at the beginning of the vector.
772 *
773 * This will have the effect that the picksplit() implementation will
774 * do a better job, per preliminary tests on not-so random data.
775 */
776 struct gprefix_unions
777 {
778 text *prefix; /* a shared prefix */
779 int n; /* how many entries begins with this prefix */
780 };
781
782
783 static inline
784 text **prefix_presort(GistEntryVector *list)
785 {
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
786 GISTENTRY *ent = list->vector;
787 OffsetNumber maxoff = list->n - 1;
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
788 text *init = (text *) DatumGetPointer(ent[FirstOffsetNumber].key);
789 text *cur, *gp;
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
790 int gplen;
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
791 bool found;
792
793 struct gprefix_unions max;
794 struct gprefix_unions *unions = (struct gprefix_unions *)
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
795 palloc((maxoff+1) * sizeof(struct gprefix_unions));
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
796
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
797 OffsetNumber unions_it = FirstOffsetNumber; /* unions iterator */
40f9a17 presort: reuse current union entry instead of zeroing its .n
dim authored
798 OffsetNumber i, u;
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
799
800 int result_it, result_it_maxes = FirstOffsetNumber;
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
801 text **result = (text **)palloc((maxoff+1) * sizeof(text *));
802
803 #ifdef DEBUG_PRESORT_MAX
804 int debug_count;
805 #endif
806 #ifdef DEBUG_PRESORT_UNIONS
807 int debug_count;
808 #endif
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
809
810 unions[unions_it].prefix = init;
811 unions[unions_it].n = 1;
812 unions_it = OffsetNumberNext(unions_it);
813
814 max.prefix = init;
815 max.n = 1;
816
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
817 #ifdef DEBUG_PRESORT_MAX
818 elog(NOTICE, " prefix_presort(): init=%s max.prefix=%s max.n=%d",
819 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(init))),
820 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(max.prefix))),
821 max.n);
822 #endif
823
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
824 /**
825 * Prepare a list of prefixes and how many time they are found.
826 */
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
827 for(i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
828 found = false;
829 cur = (text *) DatumGetPointer(ent[i].key);
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
830
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
831 for(u = FirstOffsetNumber; u < unions_it; u = OffsetNumberNext(u)) {
832 if( unions[u].n < 1 )
833 continue;
834
835 /**
836 * We'll need the prefix itself, so it's better to call
837 * greater_prefix_internal each time rather than
838 * prefix_contains_internal then when true
839 * greater_prefix_internal.
840 */
841 gp = greater_prefix_internal(cur, unions[u].prefix);
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
842 gplen = PREFIX_VARSIZE(gp);
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
843
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
844 #ifdef DEBUG_PRESORT_GP
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
845 if( gplen > 0 ) {
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
846 elog(NOTICE, " prefix_presort(): gplen=%2d, %s @> %s = %s",
847 gplen,
848 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(gp))),
849 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(cur))),
850 (prefix_contains_internal(gp, cur, true) ? "t" : "f"));
851 }
852 #endif
853
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
854 if( gplen > 0 ) {
5593c9d DEBUG_PRESORT_GP code has to be reached before the Assert() call
dim authored
855 Assert(prefix_contains_internal(gp, cur, true));
856 }
857
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
858 if( gplen > 0 ) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
859 /**
860 * Current list entry share a common prefix with some previous
861 * analyzed list entry, update the prefix and number.
862 */
40f9a17 presort: reuse current union entry instead of zeroing its .n
dim authored
863 found = true;
864 unions[u].n += 1;
865 unions[u].prefix = gp;
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
866
867 /**
868 * We just updated unions, we may have to update max too.
869 */
40f9a17 presort: reuse current union entry instead of zeroing its .n
dim authored
870 if( unions[u].n > max.n ) {
871 max.prefix = unions[u].prefix;
872 max.n = unions[u].n;
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
873 #ifdef DEBUG_PRESORT_MAX
874 elog(NOTICE, " prefix_presort(): max.prefix=%s max.n=%d",
875 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(max.prefix))),
876 max.n);
877 #endif
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
878 }
879
880 /**
881 * break from the unions loop, we're done with it for this
882 * element.
883 */
884 break;
885 }
886 }
887 /**
888 * We're done with the unions loop, if we didn't find a common
889 * prefix we have to add the current list element to unions
890 */
891 if( !found ) {
892 unions[unions_it].prefix = cur;
893 unions[unions_it].n = 1;
894 unions_it = OffsetNumberNext(unions_it);
895 }
896 }
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
897 #ifdef DEBUG_PRESORT_UNIONS
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
898 debug_count = 0;
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
899 for(u = FirstOffsetNumber; u < unions_it; u = OffsetNumberNext(u)) {
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
900 debug_count += unions[u].n;
901 elog(NOTICE, " prefix_presort(): unions[%s] = %d",
902 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(unions[u].prefix))),
903 unions[u].n);
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
904 }
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
905 elog(NOTICE, " prefix_presort(): total: %d", debug_count);
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
906 #endif
907
908 #ifdef DEBUG_PRESORT_MAX
909 debug_count = 0;
910 for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
911 cur = (text *) DatumGetPointer(ent[i].key);
912
913 if( prefix_contains_internal(max.prefix, cur, true) )
914 debug_count++;
915 }
916 elog(NOTICE, " prefix_presort(): max.prefix %s @> %d entries",
917 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(max.prefix))),
918 debug_count);
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
919 #endif
920
921 /**
922 * We now have a list of common non-empty prefixes found on the list
923 * (unions) and kept the max entry while computing this weighted
924 * unions list.
925 *
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
926 * Simple case : a common non-empty prefix is shared by all list
927 * entries.
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
928 */
929 if( max.n == list->n ) {
930 /**
931 * A common non-empty prefix is shared by all list entries.
932 */
933 for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
934 cur = (text *) DatumGetPointer(ent[i].key);
935 result[i] = cur;
936 }
937 return result;
938 }
939
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
940 /**
941 * If we arrive here, we now have to make up the result by copying
942 * max matching elements first, then the others list entries in
943 * their original order. To do this, we reserve the first result
944 * max.n places to the max.prefix matching elements (see result_it
945 * and result_it_maxes).
946 *
947 * result_it_maxes will go from FirstOffsetNumber to max.n included,
948 * and result_it will iterate through the end of the list, that is
949 * from max.n - FirstOffsetNumber + 1 to maxoff.
950 *
951 * [a, b] contains b - a + 1 elements, hence
952 * [FirstOffsetNumber, max.n] contains max.n - FirstOffsetNumber + 1
953 * elements, whatever FirstOffsetNumber value.
954 */
955 result_it_maxes = FirstOffsetNumber;
956 result_it = OffsetNumberNext(max.n - FirstOffsetNumber + 1);
957
958 #ifdef DEBUG_PRESORT_MAX
959 elog(NOTICE, " prefix_presort(): max.prefix=%s max.n=%d result_it=%d",
960 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(max.prefix))),
961 max.n, result_it);
962 #endif
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
963
964 for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
965 cur = (text *) DatumGetPointer(ent[i].key);
966
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
967 #ifdef DEBUG_PRESORT_RESULT
968 elog(NOTICE, " prefix_presort(): ent[%4d] = %s <@ %s = %s => result[%4d]",
969 i,
970 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(cur))),
971 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(max.prefix))),
972 (prefix_contains_internal(max.prefix, cur, true) ? "t" : "f"),
973 (prefix_contains_internal(max.prefix, cur, true) ? result_it_maxes : result_it));
974 #endif
975
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
976 if( prefix_contains_internal(max.prefix, cur, true) ) {
977 /**
978 * cur has to go in first part of the list, as max.prefix is a
979 * prefix of it.
980 */
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
981 Assert(result_it_maxes <= max.n);
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
982 result[result_it_maxes] = cur;
983 result_it_maxes = OffsetNumberNext(result_it_maxes);
984 }
985 else {
986 /**
987 * cur has to go at next second part position.
988 */
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
989 Assert(result_it <= maxoff);
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
990 result[result_it] = cur;
991 result_it = OffsetNumberNext(result_it);
992 }
993 }
cf4a835 Begin main presort() loop at OffsetNumberNext(FirstOffsetNumber); and…
dim authored
994 #ifdef DEBUG_PRESORT_RESULT
995 elog(NOTICE, " prefix_presort(): result_it_maxes=%4d result_it=%4d list->n=%d maxoff=%d",
996 result_it_maxes, result_it, list->n, maxoff);
997 #endif
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
998 return result;
999 }
1000
1001
1002
1003 /**
0fc1320 Importing prefix operator and opclass files
dim authored
1004 * prefix picksplit implementation
1005 *
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1006 * The idea is to consume the SPLITVEC vector by both its start and
1007 * end, inserting one or two items at a time depending on relative
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1008 * penalty() with current ends of new vectors, or even all remaining
1009 * items at once.
0fc1320 Importing prefix operator and opclass files
dim authored
1010 *
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1011 * Idea and perl test script per AndreSN with some modifications by me
1012 * (Dimitri Fontaine).
1013 *
1014 * TODO: check whether qsort() is the right first pass. Another idea
1015 * (by dim, this time) being to care first about items which non-empty
1016 * union appears the most in the SPLITVEC vector. Perl test
1017 * implementation show good results on random test data.
0fc1320 Importing prefix operator and opclass files
dim authored
1018 */
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1019
0fc1320 Importing prefix operator and opclass files
dim authored
1020 PG_FUNCTION_INFO_V1(gprefix_picksplit);
1021 Datum
1022 gprefix_picksplit(PG_FUNCTION_ARGS)
1023 {
1024 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1025 OffsetNumber maxoff = entryvec->n - 1;
0fc1320 Importing prefix operator and opclass files
dim authored
1026 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1027
1028 int nbytes;
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1029 OffsetNumber offl, offr;
0fc1320 Importing prefix operator and opclass files
dim authored
1030 OffsetNumber *listL;
1031 OffsetNumber *listR;
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1032 text *curl, *curr, *gp;
0fc1320 Importing prefix operator and opclass files
dim authored
1033 text *unionL;
1034 text *unionR;
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1035
1036 /**
1037 * Keeping track of penalties to insert into ListL or ListR, for
1038 * both the leftmost and the rightmost element of the remaining
1039 * list.
1040 */
1041 float pll, plr, prl, prr;
1042
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1043 /**
1044 * First pass: sort out the entryvec.
1045 */
1046 text **sorted = prefix_presort(entryvec);
0fc1320 Importing prefix operator and opclass files
dim authored
1047
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1048 nbytes = (maxoff + 1) * sizeof(OffsetNumber);
0fc1320 Importing prefix operator and opclass files
dim authored
1049 listL = (OffsetNumber *) palloc(nbytes);
1050 listR = (OffsetNumber *) palloc(nbytes);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1051 v->spl_left = listL;
1052 v->spl_right = listR;
0fc1320 Importing prefix operator and opclass files
dim authored
1053 v->spl_nleft = v->spl_nright = 0;
1054
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1055 offl = FirstOffsetNumber;
1056 offr = maxoff;
1057
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1058 unionL = sorted[offl];
1059 unionR = sorted[offr];
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1060
1061 v->spl_left[v->spl_nleft++] = offl;
1062 v->spl_right[v->spl_nright++] = offr;
1063 v->spl_left = listL;
0fc1320 Importing prefix operator and opclass files
dim authored
1064 v->spl_right = listR;
1065
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1066 offl = OffsetNumberNext(offl);
1067 offr = OffsetNumberPrev(offr);
1068
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1069 for(; offl < offr; offl = OffsetNumberNext(offl), offr = OffsetNumberPrev(offr)) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1070 curl = sorted[offl];
1071 curr = sorted[offr];
1072
1073 Assert(curl != NULL && curr != NULL);
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1074
1075 pll = prefix_penalty_internal(unionL, curl);
1076 plr = prefix_penalty_internal(unionR, curl);
1077 prl = prefix_penalty_internal(unionL, curr);
1078 prr = prefix_penalty_internal(unionR, curr);
1079
1080 if( pll <= plr && prl >= prr ) {
1081 /**
1082 * curl should go to left and curr to right, unless they share
1083 * a non-empty common prefix, in which case we place both curr
1084 * and curl on the same side. Arbitrarily the left one.
1085 */
1086 if( pll == plr && prl == prr ) {
1087 gp = greater_prefix_internal(curl, curr);
7d2ac3d firsts attempts towards 8.3 compliance, using VARSIZE_ANY_EXHDR()
dim authored
1088 if( PREFIX_VARSIZE(gp) > 0 ) {
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1089 unionL = greater_prefix_internal(unionL, gp);
1090 v->spl_left[v->spl_nleft++] = offl;
1091 v->spl_left[v->spl_nleft++] = offr;
1092 continue;
1093 }
1094 }
1095 /**
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1096 * here pll <= plr and prl >= prr and (pll != plr || prl != prr)
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1097 */
1098 unionL = greater_prefix_internal(unionL, curl);
1099 unionR = greater_prefix_internal(unionR, curr);
1100 v->spl_left[v->spl_nleft++] = offl;
1101 v->spl_right[v->spl_nright++] = offr;
1102 }
1103 else if( pll > plr && prl >= prr ) {
1104 unionR = greater_prefix_internal(unionR, curr);
1105 v->spl_right[v->spl_nright++] = offr;
1106 }
1107 else if( pll <= plr && prl < prr ) {
1108 /**
1109 * Current leftmost entry is added to listL
1110 */
1111 unionL = greater_prefix_internal(unionL, curl);
1112 v->spl_left[v->spl_nleft++] = offl;
1113 }
1114 else if( (pll - plr) < (prr - prl) ) {
1115 /**
1116 * All entries still in the list go into listL
1117 */
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1118 for(; offl <= maxoff; offl = OffsetNumberNext(offl)) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1119 curl = sorted[offl];
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1120 unionL = greater_prefix_internal(unionL, curl);
1121 v->spl_left[v->spl_nleft++] = offl;
1122 }
1123 }
1124 else {
1125 /**
1126 * All entries still in the list go into listR
1127 */
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1128 for(; offl <= maxoff; offl = OffsetNumberNext(offl)) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1129 curl = sorted[offl];
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1130 unionR = greater_prefix_internal(unionR, curl);
1131 v->spl_right[v->spl_nright++] = offl;
1132 }
1133 }
0fc1320 Importing prefix operator and opclass files
dim authored
1134 }
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1135
1136 /**
1137 * The for loop continues while offl < offr. If maxoff is odd, it
1138 * could be that there's a last value to process. Here we choose
1139 * where to add it.
1140 */
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1141 if( offl == offr ) {
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1142 curl = sorted[offl];
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1143 pll = prefix_penalty_internal(unionL, curl);
1144 plr = prefix_penalty_internal(unionR, curl);
1145
1146 if( pll < plr || (pll == plr && v->spl_nleft < v->spl_nright) ) {
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1147 curl = sorted[offl];
1148 unionL = greater_prefix_internal(unionL, curl);
1149 v->spl_left[v->spl_nleft++] = offl;
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1150 }
1151 else {
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1152 curl = sorted[offl];
1153 unionR = greater_prefix_internal(unionR, curl);
1154 v->spl_right[v->spl_nright++] = offl;
f61609e First picksplit() implementation of AndrewSN ideas, missing the preli…
dim authored
1155 }
0fc1320 Importing prefix operator and opclass files
dim authored
1156 }
1157
1158 v->spl_ldatum = PointerGetDatum(unionL);
1159 v->spl_rdatum = PointerGetDatum(unionR);
3f678f4 First version working, some more doc, avoiding gcc complaints, better…
dim authored
1160
388a8a1 prefix opclass works again: picksplit() now has counts and offset rigths
dim authored
1161 /**
1162 * All read entries (maxoff) should have make it to the
1163 * GIST_SPLITVEC return value.
1164 */
1165 Assert(maxoff = v->spl_nleft+v->spl_nright);
1166
3f678f4 First version working, some more doc, avoiding gcc complaints, better…
dim authored
1167 #ifdef DEBUG
40f9a17 presort: reuse current union entry instead of zeroing its .n
dim authored
1168 elog(NOTICE, "gprefix_picksplit(): entryvec->n=%4d maxoff=%4d l=%4d r=%4d l+r=%4d unionL=%s unionR=%s",
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
1169 entryvec->n, maxoff, v->spl_nleft, v->spl_nright, v->spl_nleft+v->spl_nright,
67b9a2f picksplit() first pass is about some kind of sorting. This commit sti…
dim authored
1170 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(unionL))),
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
1171 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(unionR))));
3f678f4 First version working, some more doc, avoiding gcc complaints, better…
dim authored
1172 #endif
0fc1320 Importing prefix operator and opclass files
dim authored
1173
1174 PG_RETURN_POINTER(v);
1175 }
1176
1177 /**
1178 * Prefix union should return the greatest common prefix.
1179 */
1180 PG_FUNCTION_INFO_V1(gprefix_union);
1181 Datum
1182 gprefix_union(PG_FUNCTION_ARGS)
1183 {
1184 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1185 GISTENTRY *ent = entryvec->vector;
1186
1187 text *out, *tmp, *gp;
1188 int numranges, i = 0;
1189
1190 numranges = entryvec->n;
1191 tmp = (text *) DatumGetPointer(ent[0].key);
1192 out = tmp;
1193
1194 if( numranges == 1 ) {
1195 /**
1196 * We need to return a palloc()ed copy of ent[0].key (==tmp)
1197 */
1198 out = DatumGetTextPCopy(PointerGetDatum(tmp));
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
1199 #ifdef DEBUG_UNION
0fc1320 Importing prefix operator and opclass files
dim authored
1200 elog(NOTICE, "gprefix_union(%s) == %s",
1201 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(tmp))),
1202 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(out))));
1203 #endif
1204 PG_RETURN_POINTER(out);
1205 }
1206
1207 for (i = 1; i < numranges; i++) {
1208 tmp = (text *) DatumGetPointer(ent[i].key);
1209 gp = greater_prefix_internal(out, tmp);
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
1210 #ifdef DEBUG_UNION
0fc1320 Importing prefix operator and opclass files
dim authored
1211 elog(NOTICE, "gprefix_union: gp(%s, %s) == %s",
1212 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(out))),
1213 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(tmp))),
1214 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(gp))));
1215 #endif
1216 out = gp;
1217 }
1218
2be7f8e picksplit() presort() SYGSEGV fixed
dim authored
1219 #ifdef DEBUG_UNION
0fc1320 Importing prefix operator and opclass files
dim authored
1220 elog(NOTICE, "gprefix_union: %s",
1221 DatumGetCString(DirectFunctionCall1(textout,PointerGetDatum(out))));
1222 #endif
1223
1224 PG_RETURN_POINTER(out);
1225 }
1226
1227 /**
1228 * GiST Compress and Decompress methods for prefix
1229 * do not do anything.
1230 */
1231 PG_FUNCTION_INFO_V1(gprefix_compress);
1232 Datum
1233 gprefix_compress(PG_FUNCTION_ARGS)
1234 {
1235 PG_RETURN_POINTER(PG_GETARG_POINTER(0));
1236 }
1237
1238 PG_FUNCTION_INFO_V1(gprefix_decompress);
1239 Datum
1240 gprefix_decompress(PG_FUNCTION_ARGS)
1241 {
1242 PG_RETURN_POINTER(PG_GETARG_POINTER(0));
1243 }
1244
1245 /**
1246 * Equality methods
1247 */
1248 PG_FUNCTION_INFO_V1(gprefix_same);
1249 Datum
1250 gprefix_same(PG_FUNCTION_ARGS)
1251 {
1252 text *v1 = (text *) PG_GETARG_POINTER(0);
1253 text *v2 = (text *) PG_GETARG_POINTER(1);
1254 bool *result = (bool *) PG_GETARG_POINTER(2);
1255
1256 *result = DirectFunctionCall2(texteq,
1257 PointerGetDatum(v1),
1258 PointerGetDatum(v2));
1259
1260 PG_RETURN_POINTER(result);
1261 }
1262
Something went wrong with that request. Please try again.