Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

1915 lines (1644 sloc) 50.612 kb
/**
* Prefix opclass allows to efficiently index a prefix table with
* GiST.
*
* More common use case is telephony prefix searching for cost or
* routing.
*
* Many thanks to AndrewSN, who provided great amount of help in the
* writting of this opclass, on the PostgreSQL internals, GiST inner
* working and prefix search analyses.
*
* $Id: prefix.c,v 1.54 2009/10/06 09:55:32 dim Exp $
*/
#include <stdio.h>
#include "postgres.h"
#include "access/gist.h"
#include "access/skey.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "libpq/pqformat.h"
#include <math.h>
/**
* We use those DEBUG defines in the code, uncomment them to get very
* verbose output.
*
#define DEBUG
#define DEBUG_UNION
#define DEBUG_INTER
#define DEBUG_PENALTY
#define DEBUG_PICKSPLIT
#define DEBUG_CONSISTENT
#define DEBUG_PRESORT_GP
#define DEBUG_PRESORT_MAX
#define DEBUG_PRESORT_UNIONS
#define DEBUG_PRESORT_RESULT
#define DEBUG_PR_IN
#define DEBUG_PR_NORMALIZE
#define DEBUG_MAKE_VARLENA
*/
/**
* This code has only been tested with PostgreSQL 8.2 and 8.3, and a 8.1
* backport has been requested.
*
* 8.1 didn't have PG_VERSION_NUM, so we'll avoid ugly Makefiles hack by
* saying that if we don't have PG_VERSION_NUM, it must be 8.1
*/
#ifdef PG_VERSION_NUM
#define PG_MAJOR_VERSION (PG_VERSION_NUM / 100)
#else
#define PG_MAJOR_VERSION PREFIX_PGVER
#endif
#if PG_MAJOR_VERSION != 801 && PG_MAJOR_VERSION != 802 \
&& PG_MAJOR_VERSION != 803 && PG_MAJOR_VERSION != 804 \
&& PG_MAJOR_VERSION != 900 && PG_MAJOR_VERSION != 901
#error "Unknown or unsupported postgresql version"
#endif
/* PG_MODULE_MAGIC was introduced in 8.2. */
#if PG_MAJOR_VERSION >= 802
PG_MODULE_MAGIC;
#endif
/**
* Define our own varlena size macro depending on PGVER
*/
#if PG_VERSION_NUM / 100 <= 802
#define PREFIX_VARSIZE(x) (VARSIZE(x) - VARHDRSZ)
#define PREFIX_VARDATA(x) (VARDATA(x))
#define PREFIX_PG_GETARG_TEXT(x) (PG_GETARG_TEXT_P(x))
#define PREFIX_SET_VARSIZE(p, s) (VARATT_SIZEP(p) = s)
#else
#define PREFIX_VARSIZE(x) (VARSIZE_ANY_EXHDR(x))
#define PREFIX_VARDATA(x) (VARDATA_ANY(x))
#define PREFIX_PG_GETARG_TEXT(x) (PG_GETARG_TEXT_PP(x))
#define PREFIX_SET_VARSIZE(p, s) (SET_VARSIZE(p, s))
#endif
/**
* prefix_range datatype, varlena structure
*/
typedef struct {
char first;
char last;
char prefix[1]; /* this is a varlena structure, data follows */
} prefix_range;
enum pr_delimiters_t {
PR_OPEN = '[',
PR_CLOSE = ']',
PR_SEP = '-'
} pr_delimiters;
/**
* prefix_range input/output functions and operators
*/
Datum prefix_range_init(PG_FUNCTION_ARGS);
Datum prefix_range_in(PG_FUNCTION_ARGS);
Datum prefix_range_out(PG_FUNCTION_ARGS);
Datum prefix_range_recv(PG_FUNCTION_ARGS);
Datum prefix_range_send(PG_FUNCTION_ARGS);
Datum prefix_range_cast_to_text(PG_FUNCTION_ARGS);
Datum prefix_range_cast_from_text(PG_FUNCTION_ARGS);
Datum prefix_range_length(PG_FUNCTION_ARGS);
Datum prefix_range_eq(PG_FUNCTION_ARGS);
Datum prefix_range_neq(PG_FUNCTION_ARGS);
Datum prefix_range_lt(PG_FUNCTION_ARGS);
Datum prefix_range_le(PG_FUNCTION_ARGS);
Datum prefix_range_gt(PG_FUNCTION_ARGS);
Datum prefix_range_ge(PG_FUNCTION_ARGS);
Datum prefix_range_cmp(PG_FUNCTION_ARGS);
Datum prefix_range_overlaps(PG_FUNCTION_ARGS);
Datum prefix_range_contains(PG_FUNCTION_ARGS);
Datum prefix_range_contains_strict(PG_FUNCTION_ARGS);
Datum prefix_range_contained_by(PG_FUNCTION_ARGS);
Datum prefix_range_contained_by_strict(PG_FUNCTION_ARGS);
Datum prefix_range_union(PG_FUNCTION_ARGS);
Datum prefix_range_inter(PG_FUNCTION_ARGS);
#define DatumGetPrefixRange(X) ((prefix_range *) PREFIX_VARDATA(DatumGetPointer(X)) )
#define PrefixRangeGetDatum(X) PointerGetDatum(make_varlena(X))
#define PG_GETARG_PREFIX_RANGE_P(n) DatumGetPrefixRange(PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
#define PG_RETURN_PREFIX_RANGE_P(x) return PrefixRangeGetDatum(x)
/**
* Used by prefix_contains_internal and pr_contains_prefix.
*
* plen is the length of string p, qlen the length of string q, the
* caller are dealing with either text * or char * and its their
* responsabolity to use either strlen() or PREFIX_VARSIZE()
*/
static inline
bool __prefix_contains(char *p, char *q, int plen, int qlen) {
if(qlen < plen )
return false;
return memcmp(p, q, plen) == 0;
}
static inline
char *__greater_prefix(char *a, char *b, int alen, int blen)
{
int i = 0;
char *result = NULL;
for(i=0; i<alen && i<blen && a[i] == b[i]; i++);
/* i is the last common char position in a, or 0 */
if( i == 0 ) {
/**
* return ""
*/
result = (char *)palloc(sizeof(char));
}
else {
result = (char *)palloc((i+1) * sizeof(char));
memcpy(result, a, i);
}
result[i] = 0;
return result;
}
/**
* Helper function which builds a prefix_range from a prefix, a first
* and a last component, making a copy of the prefix string.
*/
static inline
prefix_range *build_pr(const char *prefix, char first, char last) {
int s = strlen(prefix) + 1;
prefix_range *pr = palloc(sizeof(prefix_range) + s);
memcpy(pr->prefix, prefix, s);
pr->first = first;
pr->last = last;
#ifdef DEBUG_PR_IN
elog(NOTICE,
"build_pr: pr->prefix = '%s', pr->first = %d, pr->last = %d",
pr->prefix, pr->first, pr->last);
#endif
return pr;
}
/**
* Normalize a prefix_range. Two cases are handled:
*
* abc[x-x] is rewritten abcx
* abc[x-y] is rewritten abc[y-x] when y < x
*/
static inline
prefix_range *pr_normalize(prefix_range *a) {
char tmpswap;
char *prefix;
prefix_range *pr = build_pr(a->prefix, a->first, a->last);
if( pr->first == pr->last ) {
int s = strlen(pr->prefix)+2;
prefix = (char *)palloc(s);
memcpy(prefix, pr->prefix, s-2);
prefix[s-2] = pr->first;
prefix[s-1] = 0;
#ifdef DEBUG_PR_NORMALIZE
elog(NOTICE, "prefix_range %s %s %s", a->prefix, pr->prefix, prefix);
#endif
pfree(pr);
pr = build_pr(prefix, 0, 0);
}
else if( pr->first > pr->last ) {
tmpswap = pr->first;
pr->first = pr->last;
pr->last = tmpswap;
}
return pr;
}
/*
* Init a prefix value from the prefix_range(text, text, text)
* function
*/
static inline
prefix_range *make_prefix_range(char *str, char first, char last) {
int len;
prefix_range *pr = NULL;
if( str != NULL )
pr = build_pr(str, first, last);
else
pr = build_pr("", first, last);
len = strlen(pr->prefix);
memcpy(pr->prefix, str, len);
pr->prefix[len] = 0;
return pr_normalize(pr);
}
/**
* First, the input reader. A prefix range will have to respect the
* following regular expression: .*([[].-.[]])?
*
* examples : 123[4-6], [1-3], 234, 01[] --- last one not covered by
* regexp.
*/
static inline
prefix_range *pr_from_str(char *str) {
prefix_range *pr = NULL;
char *prefix = (char *)palloc(strlen(str)+1);
char current = 0, previous = 0;
bool opened = false;
bool closed = false;
bool sawsep = false;
char *ptr, *prefix_ptr = prefix;
bzero(prefix, strlen(str)+1);
for(ptr=str; *ptr != 0; ptr++) {
previous = current;
current = *ptr;
if( !opened && current != PR_OPEN )
*prefix_ptr++ = current;
#ifdef DEBUG_PR_IN
elog(NOTICE, "prefix_range previous='%c' current='%c' prefix='%s'",
(previous?previous:' '), current, prefix);
#endif
switch( current ) {
case PR_OPEN:
if( opened ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s contains several %c", str, PR_OPEN);
#endif
return NULL;
}
opened = true;
pr = build_pr(prefix, 0, 0);
break;
case PR_SEP:
if( opened ) {
if( closed ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s contains trailing character", str);
#endif
return NULL;
}
sawsep = true;
if( previous == PR_OPEN ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s has separator following range opening, without data", str);
#endif
return NULL;
}
pr->first = previous;
}
break;
case PR_CLOSE:
if( !opened ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s closes a range which is not opened ", str);
#endif
return NULL;
}
if( closed ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s contains several %c", str, PR_CLOSE);
#endif
return NULL;
}
closed = true;
if( sawsep ) {
if( previous == PR_SEP ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s has a closed range without last bound", str);
#endif
return NULL;
}
pr->last = previous;
}
else {
if( previous != PR_OPEN ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s has a closing range without separator", str);
#endif
return NULL;
}
}
break;
default:
if( closed ) {
#ifdef DEBUG_PR_IN
elog(ERROR,
"prefix_range %s contains trailing characters", str);
#endif
return NULL;
}
break;
}
}
if( ! opened ) {
pr = build_pr(prefix, 0, 0);
}
if( opened && !closed ) {
#ifdef DEBUG_PR_IN
elog(ERROR, "prefix_range %s opens a range but does not close it", str);
#endif
return NULL;
}
pr = pr_normalize(pr);
#ifdef DEBUG_PR_IN
if( pr != NULL ) {
if( pr->first && pr->last )
elog(NOTICE,
"prefix_range %s: prefix = '%s', first = '%c', last = '%c'",
str, pr->prefix, pr->first, pr->last);
else
elog(NOTICE,
"prefix_range %s: prefix = '%s', no first nor last",
str, pr->prefix);
}
#endif
return pr;
}
static inline
struct varlena *make_varlena(prefix_range *pr) {
struct varlena *vdat;
int size;
if (pr != NULL) {
size = sizeof(prefix_range) + ((strlen(pr->prefix)+1)*sizeof(char)) + VARHDRSZ;
vdat = palloc(size);
PREFIX_SET_VARSIZE(vdat, size);
memcpy(VARDATA(vdat), pr, (size - VARHDRSZ));
#ifdef DEBUG_MAKE_VARLENA
elog(NOTICE, "make varlena: size=%d varsize=%d compressed=%c external=%c %s[%c-%c] %s[%c-%c]",
size,
PREFIX_VARSIZE(vdat),
(VARATT_IS_COMPRESSED(vdat) ? 't' : 'f'),
(VARATT_IS_EXTERNAL(vdat) ? 't' : 'f'),
pr->prefix,
(pr->first != 0 ? pr->first : ' '),
(pr->last != 0 ? pr->last : ' '),
((prefix_range *)VARDATA(vdat))->prefix,
(((prefix_range *)VARDATA(vdat))->first ? ((prefix_range *)VARDATA(vdat))->first : ' '),
(((prefix_range *)VARDATA(vdat))->last ? ((prefix_range *)VARDATA(vdat))->last : ' '));
#endif
return vdat;
}
return NULL;
}
/*
* Allow users to use length(prefix) rather than length(prefix::text), and
* while at it, provides an implementation which won't count the displaying
* artifacts that are the [] and -.
*/
static inline
int pr_length(prefix_range *pr) {
int len = strlen(pr->prefix);
if( pr->first != 0 )
len += 1;
if( pr->last != 0 )
len += 1;
return len;
}
static inline
bool pr_eq(prefix_range *a, prefix_range *b) {
int sa = strlen(a->prefix);
int sb = strlen(b->prefix);
return sa == sb
&& memcmp(a->prefix, b->prefix, sa) == 0
&& a->first == b->first
&& a->last == b->last;
}
/*
* We invent a prefix_range ordering for convenience, but that's
* dangerous. Use the BTree opclass at your own risk.
*
* On the other hand, when your routing table does contain pretty static
* data and you test it carefully or know it will fit into the ordering
* simplification, you're good to go.
*
* Baring bug, the constraint is to have non-overlapping data.
*/
/*static inline
bool pr_lt(prefix_range *a, prefix_range *b, bool eqval) {
*/
static inline
int pr_cmp(prefix_range *a, prefix_range *b) {
int cmp = 0;
int alen = strlen(a->prefix);
int blen = strlen(b->prefix);
int mlen = alen; /* minimum length */
char *p = a->prefix;
char *q = b->prefix;
/*
* First case, common prefix length
*/
if( alen == blen ) {
cmp = memcmp(p, q, alen);
/* Uncommon prefix, easy to compare */
if( cmp != 0 )
return cmp;
/* Common prefix, check for (sub)ranges */
else
return (a->first == b->first) ? (a->last - b->last) : (a->first - b->first);
}
/* For memcmp() safety, we need the minimum length */
if( mlen > blen )
mlen = blen;
/*
* Don't forget we may have [x-y] prefix style, that's empty prefix, only range.
*/
if( alen == 0 && a->first != 0 ) {
/* return (eqval ? (a->first <= q[0]) : (a->first < q[0])); */
return a->first - q[0];
}
else if( blen == 0 && b->first != 0 ) {
/* return (eqval ? (p[0] <= b->first) : (p[0] < b->first)); */
return p[0] - b->first;
}
/*
* General case
*
* When memcmp() on the shorter of p and q returns 0, that means they
* share a common prefix: avoid to say that '93' < '9377' and '9377' <
* '93'.
*/
cmp = memcmp(p, q, mlen);
if( cmp == 0 )
/*
* we are comparing e.g. '1' and '12' (the shorter contains the
* smaller), so let's pretend '12' < '1' as it contains less elements.
*/
return (alen == mlen) ? 1 : -1;
return cmp;
}
static inline
bool pr_lt(prefix_range *a, prefix_range *b, bool eqval) {
int cmp = pr_cmp(a, b);
return eqval ? cmp <= 0 : cmp < 0;
}
static inline
bool pr_gt(prefix_range *a, prefix_range *b, bool eqval) {
int cmp = pr_cmp(a, b);
return eqval ? cmp >= 0 : cmp > 0;
}
static inline
bool pr_contains(prefix_range *left, prefix_range *right, bool eqval) {
int sl;
int sr;
bool left_prefixes_right;
if( pr_eq(left, right) )
return eqval;
sl = strlen(left->prefix);
sr = strlen(right->prefix);
if( sr < sl )
return false;
left_prefixes_right = memcmp(left->prefix, right->prefix, sl) == 0;
if( left_prefixes_right ) {
if( sl == sr )
return left->first == 0 ||
(left->first <= right->first && left->last >= right->last);
return left->first == 0 ||
(left->first <= right->prefix[sl] && right->prefix[sl] <= left->last);
}
return false;
}
/**
* does a given prefix_range includes a given prefix?
*/
static inline
bool pr_contains_prefix(prefix_range *pr, text *query, bool eqval) {
int plen = strlen(pr->prefix);
int qlen = PREFIX_VARSIZE(query);
char *p = pr->prefix;
char *q = (char *)PREFIX_VARDATA(query);
if( __prefix_contains(p, q, plen, qlen) ) {
if( pr->first == 0 || qlen == plen ) {
return eqval;
}
/**
* __prefix_contains() is true means qlen >= plen, and previous
* test ensures qlen != plen, we hence assume qlen > plen.
*/
Assert(qlen > plen);
return pr-> first <= q[plen] && q[plen] <= pr->last;
}
return false;
}
static
prefix_range *pr_union(prefix_range *a, prefix_range *b) {
prefix_range *res = NULL;
int alen = strlen(a->prefix);
int blen = strlen(b->prefix);
char *gp = NULL;
int gplen;
char min, max;
if( 0 == alen && 0 == blen ) {
res = build_pr("",
a->first <= b->first ? a->first : b->first,
a->last >= b->last ? a->last : b->last);
return pr_normalize(res);
}
gp = __greater_prefix(a->prefix, b->prefix, alen, blen);
gplen = strlen(gp);
if( gplen == 0 ) {
res = build_pr("", 0, 0);
if( alen > 0 && blen > 0 ) {
res->first = a->prefix[0];
res->last = b->prefix[0];
}
else if( alen == 0 ) {
res->first = a->first <= b->prefix[0] ? a->first : b->prefix[0];
res->last = a->last >= b->prefix[0] ? a->last : b->prefix[0];
}
else if( blen == 0 ) {
res->first = b->first <= a->prefix[0] ? b->first : a->prefix[0];
res->last = b->last >= a->prefix[0] ? b->last : a->prefix[0];
}
}
else {
res = build_pr(gp, 0, 0);
if( gplen == alen && alen == blen ) {
res->first = a->first <= b->first ? a->first : b->first;
res->last = a->last >= b->last ? a->last : b->last;
}
else if( gplen == alen ) {
Assert(alen < blen);
res->first = a->first <= b->prefix[alen] ? a->first : b->prefix[alen];
res->last = a->last >= b->prefix[alen] ? a->last : b->prefix[alen];
}
else if( gplen == blen ) {
Assert(blen < alen);
res->first = b->first <= a->prefix[blen] ? b->first : a->prefix[blen];
res->last = b->last >= a->prefix[blen] ? b->last : a->prefix[blen];
}
else {
Assert(gplen < alen && gplen < blen);
min = a->prefix[gplen];
max = b->prefix[gplen];
if( min > max ) {
min = b->prefix[gplen];
max = a->prefix[gplen];
}
res->first = min;
res->last = max;
#ifdef DEBUG_UNION
elog(NOTICE, "union a: %s %d %d", a->prefix, a->first, a->last);
elog(NOTICE, "union b: %s %d %d", b->prefix, b->first, b->last);
elog(NOTICE, "union r: %s %d %d", res->prefix, res->first, res->last);
#endif
}
}
return pr_normalize(res);
}
static inline
prefix_range *pr_inter(prefix_range *a, prefix_range *b) {
prefix_range *res = NULL;
int alen = strlen(a->prefix);
int blen = strlen(b->prefix);
char *gp = NULL;
int gplen;
if( 0 == alen && 0 == blen ) {
res = build_pr("",
a->first > b->first ? a->first : b->first,
a->last < b->last ? a->last : b->last);
return pr_normalize(res);
}
gp = __greater_prefix(a->prefix, b->prefix, alen, blen);
gplen = strlen(gp);
if( gplen != alen && gplen != blen ) {
return build_pr("", 0, 0);
}
if( gplen == alen && 0 == alen ) {
if( a->first <= b->prefix[0] && b->prefix[0] <= a->last ) {
res = build_pr(b->prefix, b->first, b->last);
}
else
res = build_pr("", 0, 0);
}
else if( gplen == blen && 0 == blen ) {
if( b->first <= a->prefix[0] && a->prefix[0] <= b->last ) {
res = build_pr(a->prefix, a->first, a->last);
}
else
res = build_pr("", 0, 0);
}
else if( gplen == alen && alen == blen ) {
res = build_pr(gp,
a->first > b->first ? a->first : b->first,
a->last > b->last ? a->last : b->last);
#ifdef DEBUG_INTER
elog(NOTICE, "inter a: %s %d %d", a->prefix, a->first, a->last);
elog(NOTICE, "inter b: %s %d %d", b->prefix, b->first, b->last);
elog(NOTICE, "inter r: %s %d %d", res->prefix, res->first, res->last);
#endif
}
else if( gplen == alen ) {
Assert(gplen < blen);
res = build_pr(b->prefix, b->first, b->last);
}
else if( gplen == blen ) {
Assert(gplen < alen);
res = build_pr(a->prefix, a->first, a->last);
}
return pr_normalize(res);
}
/**
* true if ranges have at least one common element
*/
static inline
bool pr_overlaps(prefix_range *a, prefix_range *b) {
prefix_range *inter = pr_inter(a, b);
return strlen(inter->prefix) > 0 || (inter->first != 0 && inter->last != 0);
}
PG_FUNCTION_INFO_V1(prefix_range_init);
Datum
prefix_range_init(PG_FUNCTION_ARGS)
{
text *txt = PG_GETARG_TEXT_P(0);
char *str =
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(txt)));
text *t_first = PG_GETARG_TEXT_P(1);
char *c_first =
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(t_first)));
text *t_last = PG_GETARG_TEXT_P(2);
char *c_last =
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(t_last)));
int flen = t_first == NULL ? 0 : strlen(c_first);
int llen = t_last == NULL ? 0 : strlen(c_last);
char first, last;
if( flen > 1 || llen > 1 )
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("prefix_range first and last must be at most 1 char long.")
));
first = flen == 0 ? 0 : c_first[0];
last = llen == 0 ? 0 : c_last[0];
PG_RETURN_PREFIX_RANGE_P(make_prefix_range(str, first, last));
}
PG_FUNCTION_INFO_V1(prefix_range_in);
Datum
prefix_range_in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
prefix_range *pr = pr_from_str(str);
if (pr != NULL) {
PG_RETURN_PREFIX_RANGE_P(pr);
}
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid prefix_range value: \"%s\"", str)));
PG_RETURN_NULL();
}
PG_FUNCTION_INFO_V1(prefix_range_out);
Datum
prefix_range_out(PG_FUNCTION_ARGS)
{
prefix_range *pr = PG_GETARG_PREFIX_RANGE_P(0);
char *out = NULL;
if( pr->first ) {
out = (char *)palloc((strlen(pr->prefix)+6) * sizeof(char));
sprintf(out, "%s[%c-%c]", pr->prefix, pr->first, pr->last);
}
else {
out = (char *)palloc((strlen(pr->prefix)+1) * sizeof(char));
sprintf(out, "%s", pr->prefix);
}
PG_RETURN_CSTRING(out);
}
PG_FUNCTION_INFO_V1(prefix_range_recv);
Datum
prefix_range_recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
const char *first = pq_getmsgbytes(buf, 1);
const char *last = pq_getmsgbytes(buf, 1);
const char *prefix = pq_getmsgstring(buf);
prefix_range *pr = build_pr(prefix, *first, *last);
pq_getmsgend(buf);
PG_RETURN_PREFIX_RANGE_P(pr);
}
PG_FUNCTION_INFO_V1(prefix_range_send);
Datum
prefix_range_send(PG_FUNCTION_ARGS)
{
prefix_range *pr = PG_GETARG_PREFIX_RANGE_P(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendbyte(&buf, pr->first);
pq_sendbyte(&buf, pr->last);
pq_sendstring(&buf, pr->prefix);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
PG_FUNCTION_INFO_V1(prefix_range_cast_from_text);
Datum
prefix_range_cast_from_text(PG_FUNCTION_ARGS)
{
text *txt = PG_GETARG_TEXT_P(0);
Datum cstring = DirectFunctionCall1(textout, PointerGetDatum(txt));
return DirectFunctionCall1(prefix_range_in, cstring);
}
PG_FUNCTION_INFO_V1(prefix_range_cast_to_text);
Datum
prefix_range_cast_to_text(PG_FUNCTION_ARGS)
{
prefix_range *pr = PG_GETARG_PREFIX_RANGE_P(0);
Datum cstring;
text *out;
if (pr != NULL) {
cstring = DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(pr));
out = (text *)DirectFunctionCall1(textin, cstring);
PG_RETURN_TEXT_P(out);
}
PG_RETURN_NULL();
}
PG_FUNCTION_INFO_V1(prefix_range_length);
Datum
prefix_range_length(PG_FUNCTION_ARGS)
{
PG_RETURN_INT32( pr_length(PG_GETARG_PREFIX_RANGE_P(0)) );
}
PG_FUNCTION_INFO_V1(prefix_range_eq);
Datum
prefix_range_eq(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_eq(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1)) );
}
PG_FUNCTION_INFO_V1(prefix_range_neq);
Datum
prefix_range_neq(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( ! pr_eq(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1)) );
}
PG_FUNCTION_INFO_V1(prefix_range_lt);
Datum
prefix_range_lt(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_lt(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
FALSE) );
}
PG_FUNCTION_INFO_V1(prefix_range_le);
Datum
prefix_range_le(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_lt(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
TRUE) );
}
PG_FUNCTION_INFO_V1(prefix_range_gt);
Datum
prefix_range_gt(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_gt(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
FALSE) );
}
PG_FUNCTION_INFO_V1(prefix_range_ge);
Datum
prefix_range_ge(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_gt(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
TRUE) );
}
PG_FUNCTION_INFO_V1(prefix_range_cmp);
Datum
prefix_range_cmp(PG_FUNCTION_ARGS)
{
prefix_range *a = PG_GETARG_PREFIX_RANGE_P(0);
prefix_range *b = PG_GETARG_PREFIX_RANGE_P(1);
PG_RETURN_INT32(pr_cmp(a, b));
}
PG_FUNCTION_INFO_V1(prefix_range_overlaps);
Datum
prefix_range_overlaps(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_overlaps(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1)) );
}
PG_FUNCTION_INFO_V1(prefix_range_contains);
Datum
prefix_range_contains(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
TRUE ));
}
PG_FUNCTION_INFO_V1(prefix_range_contains_strict);
Datum
prefix_range_contains_strict(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1),
FALSE ));
}
PG_FUNCTION_INFO_V1(prefix_range_contained_by);
Datum
prefix_range_contained_by(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(1),
PG_GETARG_PREFIX_RANGE_P(0),
TRUE ));
}
PG_FUNCTION_INFO_V1(prefix_range_contained_by_strict);
Datum
prefix_range_contained_by_strict(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL( pr_contains(PG_GETARG_PREFIX_RANGE_P(1),
PG_GETARG_PREFIX_RANGE_P(0),
FALSE ));
}
PG_FUNCTION_INFO_V1(prefix_range_union);
Datum
prefix_range_union(PG_FUNCTION_ARGS)
{
PG_RETURN_PREFIX_RANGE_P( pr_union(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1)) );
}
PG_FUNCTION_INFO_V1(prefix_range_inter);
Datum
prefix_range_inter(PG_FUNCTION_ARGS)
{
PG_RETURN_PREFIX_RANGE_P( pr_inter(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1)) );
}
/**
* GiST support methods
*
* pr_penalty allows SQL level penalty code testing.
*/
Datum gpr_consistent(PG_FUNCTION_ARGS);
Datum gpr_compress(PG_FUNCTION_ARGS);
Datum gpr_decompress(PG_FUNCTION_ARGS);
Datum gpr_penalty(PG_FUNCTION_ARGS);
Datum gpr_picksplit(PG_FUNCTION_ARGS);
Datum gpr_picksplit_presort(PG_FUNCTION_ARGS);
Datum gpr_picksplit_jordan(PG_FUNCTION_ARGS);
Datum gpr_union(PG_FUNCTION_ARGS);
Datum gpr_same(PG_FUNCTION_ARGS);
Datum pr_penalty(PG_FUNCTION_ARGS);
/*
* Internal implementation of consistent
*
OPERATOR 1 @>,
OPERATOR 2 <@,
OPERATOR 3 =,
OPERATOR 4 &&,
*/
static inline
bool pr_consistent(StrategyNumber strategy,
prefix_range *key, prefix_range *query, bool is_leaf) {
switch (strategy) {
case 1:
return pr_contains(key, query, true);
case 2:
return pr_contains(query, key, true);
case 3:
if( is_leaf ) {
#ifdef DEBUG_CONSISTENT
elog(NOTICE, "gpr_consistent: %s %c= %s",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(key))),
pr_eq(key, query) ? '=' : '!',
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(query))));
#endif
return pr_eq(key, query);
}
else {
return pr_contains(key, query, true);
}
case 4:
return pr_overlaps(key, query);
default:
return false;
}
}
/*
* The consistent function signature has changed in 8.4 to include RECHECK
* handling, but the signature declared in the OPERATOR CLASS is not
* considered at all.
*
* Still the function is called by mean of the fmgr, so we know whether
* we're called with pre-8.4 conventions or not by checking PG_NARGS().
*
* In all cases, we currently ignore the oid parameter (subtype).
*/
PG_FUNCTION_INFO_V1(gpr_consistent);
Datum
gpr_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
prefix_range *query = PG_GETARG_PREFIX_RANGE_P(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
prefix_range *key = DatumGetPrefixRange(entry->key);
bool *recheck;
Assert( PG_NARGS() == 4 || PG_NARGS() == 5);
if( PG_NARGS() == 5 ) {
/*
* New API in 8.4:
* consistent(internal, data_type, smallint, oid, internal)
*
* We don't ever want to recheck: index ain't lossy, we store
* prefix_range keys on the leaves.
*/
recheck = (bool *) PG_GETARG_POINTER(4);
*recheck = false;
}
PG_RETURN_BOOL( pr_consistent(strategy, key, query, GIST_LEAF(entry)) );
}
/*
* GiST Compress and Decompress methods for prefix_range
* do not do anything.
*/
PG_FUNCTION_INFO_V1(gpr_compress);
Datum
gpr_compress(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(PG_GETARG_POINTER(0));
}
PG_FUNCTION_INFO_V1(gpr_decompress);
Datum
gpr_decompress(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(PG_GETARG_POINTER(0));
}
static
float __pr_penalty(prefix_range *orig, prefix_range *new)
{
float penalty;
char *gp;
int nlen, olen, gplen, dist = 0;
char tmp;
#ifdef DEBUG_PENALTY
if( orig->prefix[0] != 0 ) {
/**
* The prefix main test case deals with phone number data, hence
* containing only numbers...
*/
if( orig->prefix[0] < '0' || orig->prefix[0] > '9' )
elog(NOTICE, "__pr_penalty(%s, %s) orig->first=%d orig->last=%d ",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(orig))),
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(new))),
orig->first, orig->last);
Assert(orig->prefix[0] >= '0' && orig->prefix[0] <= '9');
}
#endif
olen = strlen(orig->prefix);
nlen = strlen(new->prefix);
gp = __greater_prefix(orig->prefix, new->prefix, olen, nlen);
gplen = strlen(gp);
dist = 1;
if( 0 == olen && 0 == nlen ) {
if( orig->last >= new->first )
dist = 0;
else
dist = new->first - orig->last;
}
else if( 0 == olen ) {
/**
* penalty('[a-b]', 'xyz');
*/
if( orig->first != 0 ) {
tmp = new->prefix[0];
if( orig->first <= tmp && tmp <= orig->last ) {
gplen = 1;
dist = 1 + (int)tmp - (int)orig->first;
if( (1 + (int)orig->last - (int)tmp) < dist )
dist = 1 + (int)orig->last - (int)tmp;
}
else
dist = (orig->first > tmp ? orig->first - tmp : tmp - orig->last );
}
}
else if( 0 == nlen ) {
/**
* penalty('abc', '[x-y]');
*/
if( new->first != 0 ) {
tmp = orig->prefix[0];
if( new->first <= tmp && tmp <= new->last ) {
gplen = 1;
dist = 1 + (int)tmp - (int)new->first;
if( (1 + (int)new->last - (int)tmp) < dist )
dist = 1 + (int)new->last - (int)tmp;
}
else
dist = (new->first > tmp ? new->first - tmp : tmp - new->last );
}
}
else {
/**
* General case
*/
if( gplen > 0 ) {
if( olen > gplen && nlen == gplen && new->first != 0 ) {
/**
* gpr_penalty('abc[f-l]', 'ab[x-y]')
*/
if( new->first <= orig->prefix[gplen]
&& orig->prefix[gplen] <= new->last ) {
dist = 1 + (int)orig->prefix[gplen] - (int)new->first;
if( (1 + (int)new->last - (int)orig->prefix[gplen]) < dist )
dist = 1 + (int)new->last - (int)orig->prefix[gplen];
gplen += 1;
}
else {
dist += 1;
}
}
else if( nlen > gplen && olen == gplen && orig->first != 0 ) {
/**
* gpr_penalty('ab[f-l]', 'abc[x-y]')
*/
if( orig->first <= new->prefix[gplen]
&& new->prefix[gplen] <= orig->last ) {
dist = 1 + (int)new->prefix[gplen] - (int)orig->first;
if( (1 + (int)orig->last - (int)new->prefix[gplen]) < dist )
dist = 1 + (int)orig->last - (int)new->prefix[gplen];
gplen += 1;
}
else {
dist += 1;
}
}
}
/**
* penalty('abc[f-l]', 'xyz[g-m]'), nothing common
* dist = 1, gplen = 0, penalty = 1
*/
}
penalty = (((float)dist) / powf(256, gplen));
#ifdef DEBUG_PENALTY
elog(NOTICE, "__pr_penalty(%s, %s) == %d/(256^%d) == %g",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(orig))),
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(new))),
dist, gplen, penalty);
#endif
return penalty;
}
PG_FUNCTION_INFO_V1(gpr_penalty);
Datum
gpr_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
prefix_range *orig = DatumGetPrefixRange(origentry->key);
prefix_range *new = DatumGetPrefixRange(newentry->key);
*penalty = __pr_penalty(orig, new);
PG_RETURN_POINTER(penalty);
}
PG_FUNCTION_INFO_V1(pr_penalty);
Datum
pr_penalty(PG_FUNCTION_ARGS)
{
float penalty = __pr_penalty(PG_GETARG_PREFIX_RANGE_P(0),
PG_GETARG_PREFIX_RANGE_P(1));
PG_RETURN_FLOAT4(penalty);
}
/*
* Don't bother compiling this for 8.1, where pg_qsort ain't available.
*
* That's an experimental feature anyway, only used in the
* gist_prefix_range_jordan_ops opclass, which is not talked about in the
* user documentation of the module.
*/
#if PG_VERSION_NUM > 801
static int gpr_cmp(const void *a, const void *b) {
GISTENTRY **e1 = (GISTENTRY **)a;
GISTENTRY **e2 = (GISTENTRY **)b;
prefix_range *k1 = DatumGetPrefixRange((*e1)->key);
prefix_range *k2 = DatumGetPrefixRange((*e2)->key);
return pr_cmp(k1, k2);
}
#endif
/**
* Median idea from Jordan:
*
* sort the entries and choose a cut point near the median, being
* careful not to cut a group sharing a common prefix when sensible.
*/
PG_FUNCTION_INFO_V1(gpr_picksplit_jordan);
Datum
gpr_picksplit_jordan(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
OffsetNumber maxoff = entryvec->n - 1;
GISTENTRY *ent = entryvec->vector;
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
int i, nbytes;
OffsetNumber *left, *right;
prefix_range *tmp_union;
prefix_range *unionL;
prefix_range *unionR;
GISTENTRY **raw_entryvec;
int cut, cut_tolerance, lower_dist, upper_dist;
maxoff = entryvec->n - 1;
nbytes = (maxoff + 1) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
left = v->spl_left;
v->spl_nleft = 0;
v->spl_right = (OffsetNumber *) palloc(nbytes);
right = v->spl_right;
v->spl_nright = 0;
unionL = NULL;
unionR = NULL;
/* Initialize the raw entry vector. */
raw_entryvec = (GISTENTRY **) malloc(entryvec->n * sizeof(void *));
for (i=FirstOffsetNumber; i <= maxoff; i=OffsetNumberNext(i))
raw_entryvec[i] = &(entryvec->vector[i]);
/* Sort the raw entry vector.
*
* It seems that pg_qsort isn't available in 8.1, but this code is
* experimental so we simply don't presort.
*/
#if PG_VERSION_NUM > 801
pg_qsort(&raw_entryvec[1], maxoff, sizeof(void *), &gpr_cmp);
#endif
/*
* Find the distance between the middle of the raw entry vector and the
* lower-index of the first group.
*/
cut = maxoff / 2;
cut_tolerance = cut / 2;
for (i=cut - 1; i > FirstOffsetNumber; i=OffsetNumberPrev(i)) {
tmp_union = pr_union(DatumGetPrefixRange(ent[i].key),
DatumGetPrefixRange(ent[i+1].key));
if( strlen(tmp_union->prefix) == 0 )
break;
}
lower_dist = cut - i;
/*
* Find the distance between the middle of the raw entry vector and the
* upper-index of the first group.
*/
for (i=1 + cut; i < maxoff; i=OffsetNumberNext(i)) {
tmp_union = pr_union(DatumGetPrefixRange(ent[i].key),
DatumGetPrefixRange(ent[i-1].key));
if( strlen(tmp_union->prefix) == 0 )
break;
}
upper_dist = i - cut;
/*
* Choose the cut based on whichever falls within the cut tolerance and
* is closer to the midpoint. Choose one at random it there is a tie.
*
* If neither are within the tolerance, use the midpoint as the default.
*/
if (lower_dist <= cut_tolerance || upper_dist <= cut_tolerance) {
if (lower_dist < upper_dist)
cut -= lower_dist;
else if (upper_dist < lower_dist)
cut += upper_dist;
else
cut = (random() % 2) ? (cut - lower_dist) : (cut + upper_dist);
}
for (i=FirstOffsetNumber; i <= maxoff; i=OffsetNumberNext(i)) {
int real_index = raw_entryvec[i] - entryvec->vector;
// datum_alpha = VECGETKEY(entryvec, real_index);
tmp_union = DatumGetPrefixRange(entryvec->vector[real_index].key);
Assert(tmp_union != NULL);
/* Put everything below the cut in the left node. */
if (i < cut) {
if( unionL == NULL )
unionL = tmp_union;
else
unionL = pr_union(unionL, tmp_union);
*left = real_index;
++left;
++(v->spl_nleft);
}
/* And put everything else in the right node. */
else {
if( unionR == NULL )
unionR = tmp_union;
else
unionR = pr_union(unionR, tmp_union);
*right = real_index;
++right;
++(v->spl_nright);
}
}
*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
v->spl_ldatum = PrefixRangeGetDatum(unionL);
v->spl_rdatum = PrefixRangeGetDatum(unionR);
PG_RETURN_POINTER(v);
}
/**
* prefix picksplit first pass step: presort the GistEntryVector
* vector by positionning the elements sharing the non-empty prefix
* which is the more frequent in the distribution at the beginning of
* the vector.
*
* This will have the effect that the picksplit() implementation will
* do a better job, per preliminary tests on not-so random data.
*/
struct gpr_unions
{
prefix_range *prefix; /* a shared prefix */
int n; /* how many entries begins with this prefix */
};
static
OffsetNumber *pr_presort(GistEntryVector *list)
{
GISTENTRY *ent = list->vector;
OffsetNumber maxoff = list->n - 1;
prefix_range *init = DatumGetPrefixRange(ent[FirstOffsetNumber].key);
prefix_range *cur, *gp;
int gplen, maxlen;
bool found;
struct gpr_unions max;
struct gpr_unions *unions = (struct gpr_unions *)
palloc((maxoff+1) * sizeof(struct gpr_unions));
OffsetNumber unions_it = FirstOffsetNumber; /* unions iterator */
OffsetNumber i, u;
int result_it, result_it_maxes = FirstOffsetNumber;
OffsetNumber *result = (OffsetNumber *) malloc(list->n * sizeof(OffsetNumber));
#ifdef DEBUG_PRESORT_MAX
#define DEBUG_COUNT
int debug_count;
#endif
#ifdef DEBUG_PRESORT_UNIONS
#ifndef DEBUG_COUNT
int debug_count;
#endif
#endif
unions[unions_it].prefix = init;
unions[unions_it].n = 1;
unions_it = OffsetNumberNext(unions_it);
max.prefix = init;
max.n = 1;
#ifdef DEBUG_PRESORT_MAX
elog(NOTICE, " prefix_presort(): init=%s max.prefix=%s max.n=%d",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(init))),
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(max.prefix))),
max.n);
#endif
/**
* Prepare a list of prefixes and how many time they are found.
*/
for(i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) {
found = false;
cur = DatumGetPrefixRange(ent[i].key);
for(u = FirstOffsetNumber; u < unions_it; u = OffsetNumberNext(u)) {
if( unions[u].n < 1 )
continue;
gp = pr_union(cur, unions[u].prefix);
gplen = strlen(gp->prefix);
#ifdef DEBUG_PRESORT_GP
if( gplen > 0 ) {
elog(NOTICE, " prefix_presort(): gplen=%2d, %s @> %s = %s",
gplen,
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(gp))),
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(cur))),
(pr_contains(gp, cur, true) ? "t" : "f"));
}
#endif
if( gplen > 0 ) {
/**
* Current list entry share a common prefix with some previous
* analyzed list entry, update the prefix and number.
*/
found = true;
unions[u].n += 1;
unions[u].prefix = gp;
/**
* We just updated unions, we may have to update max too.
*/
if( unions[u].n > max.n ) {
max.prefix = unions[u].prefix;
max.n = unions[u].n;
#ifdef DEBUG_PRESORT_MAX
elog(NOTICE, " prefix_presort(): max.prefix=%s max.n=%d",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(max.prefix))),
max.n);
#endif
}
/**
* break from the unions loop, we're done with it for this
* element.
*/
break;
}
}
/**
* We're done with the unions loop, if we didn't find a common
* prefix we have to add the current list element to unions
*/
if( !found ) {
unions[unions_it].prefix = cur;
unions[unions_it].n = 1;
unions_it = OffsetNumberNext(unions_it);
}
}
#ifdef DEBUG_PRESORT_UNIONS
debug_count = 0;
for(u = FirstOffsetNumber; u < unions_it; u = OffsetNumberNext(u)) {
debug_count += unions[u].n;
elog(NOTICE, " prefix_presort(): unions[%s] = %d",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(unions[u].prefix))),
unions[u].n);
}
elog(NOTICE, " prefix_presort(): total: %d", debug_count);
#endif
#ifdef DEBUG_PRESORT_MAX
debug_count = 0;
for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
cur = DatumGetPrefixRange(ent[i].key);
if( pr_contains(max.prefix, cur, true) )
debug_count++;
}
elog(NOTICE, " prefix_presort(): max.prefix %s @> %d entries",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(max.prefix))),
debug_count);
#endif
/**
* We now have a list of common non-empty prefixes found on the list
* (unions) and kept the max entry while computing this weighted
* unions list.
*
* Simple case : a common non-empty prefix is shared by all list
* entries.
*/
if( max.n >= list->n ) {
/**
* A common non-empty prefix is shared by all list entries
*/
for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
result[i] = i;
}
return result;
}
/**
* If we arrive here, we now have to make up the result by copying
* max matching elements first, then the others list entries in
* their original order. To do this, we reserve the first result
* max.n places to the max.prefix matching elements (see result_it
* and result_it_maxes).
*
* result_it_maxes will go from FirstOffsetNumber to max.n included,
* and result_it will iterate through the end of the list, that is
* from max.n - FirstOffsetNumber + 1 to maxoff.
*
* [a, b] contains b - a + 1 elements, hence
* [FirstOffsetNumber, max.n] contains max.n - FirstOffsetNumber + 1
* elements, whatever FirstOffsetNumber value.
*/
result_it_maxes = FirstOffsetNumber;
result_it = OffsetNumberNext(max.n - FirstOffsetNumber + 1);
#ifdef DEBUG_PRESORT_MAX
elog(NOTICE, " prefix_presort(): max.prefix=%s max.n=%d result_it=%d",
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(max.prefix))),
max.n, result_it);
#endif
maxlen = strlen((max.prefix)->prefix);
for(i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
cur = DatumGetPrefixRange(ent[i].key);
#ifdef DEBUG_PRESORT_RESULT
elog(NOTICE, " prefix_presort(): ent[%4d] = %s <@ %s = %s => result[%4d]",
i,
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(cur))),
DatumGetCString(DirectFunctionCall1(prefix_range_out,PrefixRangeGetDatum(max.prefix))),
(pr_contains(max.prefix, cur, true) ? "t" : "f"),
(pr_contains(max.prefix, cur, true) ? result_it_maxes : result_it));
#endif
if( pr_contains(max.prefix, cur, true) ) {
/**
* cur has to go in first part of the list, as max.prefix is a
* prefix of it.
*/
Assert(result_it_maxes <= max.n);
result[result_it_maxes] = i;
result_it_maxes = OffsetNumberNext(result_it_maxes);
}
else {
/**
* cur has to go at next second part position.
*/
Assert(result_it <= maxoff);
result[result_it] = i;
result_it = OffsetNumberNext(result_it);
}
}
#ifdef DEBUG_PRESORT_RESULT
elog(NOTICE, " prefix_presort(): result_it_maxes=%4d result_it=%4d list->n=%d maxoff=%d",
result_it_maxes, result_it, list->n, maxoff);
#endif
return result;
}
/**
* Internal picksplit function, with a presort option.
*
* This form allows for defining two different opclasses for
* comparative testing purposes.
*/
static
Datum pr_picksplit(GistEntryVector *entryvec, GIST_SPLITVEC *v, bool presort) {
OffsetNumber maxoff = entryvec->n - 1;
GISTENTRY *ent = entryvec->vector;
OffsetNumber *sort = NULL;
int nbytes;
OffsetNumber offl, offr;
OffsetNumber *listL;
OffsetNumber *listR;
prefix_range *curl, *curr, *tmp_union;
prefix_range *unionL;
prefix_range *unionR;
/**
* Keeping track of penalties to insert into ListL or ListR, for
* both the leftmost and the rightmost element of the remaining
* list.
*/
float pll, plr, prl, prr;
#ifdef DEBUG_PICKSPLIT
OffsetNumber i;
#endif
if( presort ) {
sort = pr_presort(entryvec);
#ifdef DEBUG_PICKSPLIT
for(i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i) ) {
elog(NOTICE, "pr_picksplit: sort[%3d] = %2d", i, sort[i]);
}
#endif
}
nbytes = (maxoff + 1) * sizeof(OffsetNumber);
listL = (OffsetNumber *) palloc(nbytes);
listR = (OffsetNumber *) palloc(nbytes);
v->spl_left = listL;
v->spl_right = listR;
v->spl_nleft = v->spl_nright = 0;
offl = FirstOffsetNumber;
offr = maxoff;
unionL = DatumGetPrefixRange(ent[offl].key);
unionR = DatumGetPrefixRange(ent[offr].key);
v->spl_left[v->spl_nleft++] = offl;
v->spl_right[v->spl_nright++] = offr;
v->spl_left = listL;
v->spl_right = listR;
offl = OffsetNumberNext(offl);
offr = OffsetNumberPrev(offr);
while( offl < offr ) {
if( presort ) {
curl = DatumGetPrefixRange(ent[sort[offl]].key);
curr = DatumGetPrefixRange(ent[sort[offr]].key);
}
else {
curl = DatumGetPrefixRange(ent[offl].key);
curr = DatumGetPrefixRange(ent[offr].key);
}
#ifdef DEBUG_PICKSPLIT
elog(NOTICE, "gpr_picksplit: ent[%3d] = '%s' \tent[%3d] = '%s'",
offl,
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(curl))),
offr,
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(curr))));
#endif
Assert(curl != NULL && curr != NULL);
pll = __pr_penalty(unionL, curl);
plr = __pr_penalty(unionR, curl);
prl = __pr_penalty(unionL, curr);
prr = __pr_penalty(unionR, curr);
if( pll <= plr && prl >= prr ) {
/**
* curl should go to left and curr to right, unless they share
* a non-empty common prefix, in which case we place both curr
* and curl on the same side. Arbitrarily the left one.
*/
if( pll == plr && prl == prr ) {
tmp_union = pr_union(curl, curr);
if( strlen(tmp_union->prefix) > 0 ) {
unionL = pr_union(unionL, tmp_union);
v->spl_left[v->spl_nleft++] = offl;
v->spl_left[v->spl_nleft++] = offr;
offl = OffsetNumberNext(offl);
offr = OffsetNumberPrev(offr);
continue;
}
}
/**
* here pll <= plr and prl >= prr and (pll != plr || prl != prr)
*/
unionL = pr_union(unionL, curl);
unionR = pr_union(unionR, curr);
v->spl_left[v->spl_nleft++] = offl;
v->spl_right[v->spl_nright++] = offr;
offl = OffsetNumberNext(offl);
offr = OffsetNumberPrev(offr);
}
else if( pll > plr && prl >= prr ) {
/**
* Current rightmost entry is added to listL
*/
unionR = pr_union(unionR, curr);
v->spl_right[v->spl_nright++] = offr;
offr = OffsetNumberPrev(offr);
}
else if( pll <= plr && prl < prr ) {
/**
* Current leftmost entry is added to listL
*/
unionL = pr_union(unionL, curl);
v->spl_left[v->spl_nleft++] = offl;
offl = OffsetNumberNext(offl);
}
else if( (pll - plr) < (prr - prl) ) {
/**
* All entries still in the list go into listL
*/
for(; offl <= offr; offl = OffsetNumberNext(offl)) {
curl = DatumGetPrefixRange(ent[offl].key);
unionL = pr_union(unionL, curl);
v->spl_left[v->spl_nleft++] = offl;
}
}
else {
/**
* All entries still in the list go into listR
*/
for(; offr >= offl; offr = OffsetNumberPrev(offr)) {
curr = DatumGetPrefixRange(ent[offr].key);
unionR = pr_union(unionR, curr);
v->spl_right[v->spl_nright++] = offr;
}
}
}
/**
* The for loop continues while offl < offr. If maxoff is odd, it
* could be that there's a last value to process. Here we choose
* where to add it.
*/
if( offl == offr ) {
curl = DatumGetPrefixRange(ent[offl].key);
pll = __pr_penalty(unionL, curl);
plr = __pr_penalty(unionR, curl);
if( pll < plr || (pll == plr && v->spl_nleft < v->spl_nright) ) {
curl = DatumGetPrefixRange(ent[offl].key);
unionL = pr_union(unionL, curl);
v->spl_left[v->spl_nleft++] = offl;
}
else {
curl = DatumGetPrefixRange(ent[offl].key);
unionR = pr_union(unionR, curl);
v->spl_right[v->spl_nright++] = offl;
}
}
v->spl_ldatum = PrefixRangeGetDatum(unionL);
v->spl_rdatum = PrefixRangeGetDatum(unionR);
/**
* All read entries (maxoff) should have make it to the
* GIST_SPLITVEC return value.
*/
Assert(maxoff = v->spl_nleft+v->spl_nright);
#ifdef DEBUG_PICKSPLIT
elog(NOTICE, "gpr_picksplit(): entryvec->n=%4d maxoff=%4d l=%4d r=%4d l+r=%4d unionL='%s' unionR='%s'",
entryvec->n, maxoff, v->spl_nleft, v->spl_nright, v->spl_nleft+v->spl_nright,
DatumGetCString(DirectFunctionCall1(prefix_range_out, v->spl_ldatum)),
DatumGetCString(DirectFunctionCall1(prefix_range_out, v->spl_rdatum)));
#endif
PG_RETURN_POINTER(v);
}
PG_FUNCTION_INFO_V1(gpr_picksplit);
Datum
gpr_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
PG_RETURN_POINTER(pr_picksplit(entryvec, v, false));
}
PG_FUNCTION_INFO_V1(gpr_picksplit_presort);
Datum
gpr_picksplit_presort(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
PG_RETURN_POINTER(pr_picksplit(entryvec, v, true));
}
PG_FUNCTION_INFO_V1(gpr_union);
Datum
gpr_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GISTENTRY *ent = entryvec->vector;
prefix_range *out, *tmp, *old;
int numranges, i = 0;
numranges = entryvec->n;
tmp = DatumGetPrefixRange(ent[0].key);
out = tmp;
if( numranges == 1 ) {
out = build_pr(tmp->prefix, tmp->first, tmp->last);
PG_RETURN_PREFIX_RANGE_P(out);
}
for (i = 1; i < numranges; i++) {
old = out;
tmp = DatumGetPrefixRange(ent[i].key);
out = pr_union(out, tmp);
#ifdef DEBUG_UNION
elog(NOTICE, "gpr_union: %s | %s = %s",
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(old))),
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(tmp))),
DatumGetCString(DirectFunctionCall1(prefix_range_out, PrefixRangeGetDatum(out))));
#endif
}
/*
#ifdef DEBUG_UNION
elog(NOTICE, "gpr_union: %s",
DatumGetCString(DirectFunctionCall1(prefix_range_out,
PrefixRangeGetDatum(out))));
#endif
*/
PG_RETURN_PREFIX_RANGE_P(out);
}
PG_FUNCTION_INFO_V1(gpr_same);
Datum
gpr_same(PG_FUNCTION_ARGS)
{
prefix_range *v1 = PG_GETARG_PREFIX_RANGE_P(0);
prefix_range *v2 = PG_GETARG_PREFIX_RANGE_P(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
*result = pr_eq(v1, v2);
PG_RETURN_POINTER( result );
}
Jump to Line
Something went wrong with that request. Please try again.