Skip to content

Commit

Permalink
Add another hamming function. The former are for varbit parameters. The
Browse files Browse the repository at this point in the history
later are for text parameters (like the other functions). Also, add another
operator ~@~.
  • Loading branch information
Euler Taveira de Oliveira committed Aug 4, 2011
1 parent 77917d6 commit c18a43d
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 8 deletions.
96 changes: 96 additions & 0 deletions hamming.c
Expand Up @@ -129,3 +129,99 @@ Datum hamming_op(PG_FUNCTION_ARGS)

PG_RETURN_BOOL(res >= pgs_hamming_threshold);
}

PG_FUNCTION_INFO_V1(hamming_text);

Datum
hamming_text(PG_FUNCTION_ARGS)
{
char *a, *b;
int alen, blen;
char *pa, *pb;
int maxlen;
float4 res = 0.0;

a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0))));
b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1))));

alen = strlen(a);
blen = strlen(b);

if (alen > PGS_MAX_STR_LEN || blen > PGS_MAX_STR_LEN)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument exceeds the maximum length of %d bytes",
PGS_MAX_STR_LEN)));

elog(DEBUG1, "alen: %d; blen: %d", alen, blen);

if (alen != blen)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("bit strings must have the same length")));

elog(DEBUG1, "a: %s ; b: %s", a, b);

/* alen and blen have the same length */
maxlen = alen;

pa = a;
pb = b;
while (*pa != '\0')
{
elog(DEBUG4, "a: %c ; b: %c", *pa, *pb);

/* are these bit strings? */
if ((*pa != '0' && *pa != '1') || (*pb != '0' && *pb != '1'))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("arguments must contain only 0 or 1")));
if (*pa++ != *pb++)
res++;
}

elog(DEBUG1, "is normalized: %d", pgs_hamming_is_normalized);
elog(DEBUG1, "maximum length: %d", maxlen);

elog(DEBUG1, "hammingdistance(%s, %s) = %.3f", DatumGetCString(a), DatumGetCString(b), res);

/* if one string has zero length then return one */
if (maxlen == 0)
{
PG_RETURN_FLOAT4(1.0);
}
else if (pgs_hamming_is_normalized)
{
res = 1.0 - (res / maxlen);
elog(DEBUG1, "hamming(%s, %s) = %.3f", DatumGetCString(a), DatumGetCString(b), res);
PG_RETURN_FLOAT4(res);
}
else
{
PG_RETURN_FLOAT4(res);
}
}

PG_FUNCTION_INFO_V1(hamming_text_op);

Datum hamming_text_op(PG_FUNCTION_ARGS)
{
float4 res;

/*
* store *_is_normalized value temporarily 'cause
* threshold (we're comparing against) is normalized
*/
bool tmp = pgs_hamming_is_normalized;
pgs_hamming_is_normalized = true;

res = DatumGetFloat4(DirectFunctionCall2(
hamming_text,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));

/* we're done; back to the previous value */
pgs_hamming_is_normalized = tmp;

PG_RETURN_BOOL(res >= pgs_hamming_threshold);
}
22 changes: 15 additions & 7 deletions pg_similarity.sql.in
Expand Up @@ -80,13 +80,21 @@ CREATE OR REPLACE FUNCTION hamming_op (varbit, varbit) RETURNS bool
AS 'MODULE_PATHNAME', 'hamming_op'
LANGUAGE C STABLE STRICT;

--CREATE OPERATOR ~@@ (
-- LEFTARG = text,
-- RIGHTARG = text,
-- PROCEDURE = hamming_op,
-- COMMUTATOR = '~@@',
-- JOIN = contjoinsel
--);
CREATE OR REPLACE FUNCTION hamming_text (text, text) RETURNS float4
AS 'MODULE_PATHNAME','hamming_text'
LANGUAGE C IMMUTABLE STRICT;

CREATE OR REPLACE FUNCTION hamming_text_op (text, text) RETURNS bool
AS 'MODULE_PATHNAME', 'hamming_text_op'
LANGUAGE C STABLE STRICT;

CREATE OPERATOR ~@~ (
LEFTARG = text,
RIGHTARG = text,
PROCEDURE = hamming_text_op,
COMMUTATOR = '~@~',
JOIN = contjoinsel
);

-- Jaccard
CREATE OR REPLACE FUNCTION jaccard (text, text) RETURNS float4
Expand Down
2 changes: 2 additions & 0 deletions similarity.h
Expand Up @@ -214,6 +214,8 @@ extern Datum PGS_EXPORT euclidean(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT euclidean_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_text(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_text_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaccard(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaccard_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaro(PG_FUNCTION_ARGS);
Expand Down
4 changes: 3 additions & 1 deletion uninstall_pg_similarity.sql
Expand Up @@ -19,7 +19,9 @@ DROP OPERATOR ~!! (text, text);
DROP FUNCTION euclidean (text, text);
DROP FUNCTION euclidean_op (text, text);

--DROP OPERATOR ~@@ (text, text);
DROP OPERATOR ~@~ (text, text);
DROP FUNCTION hamming_text (text, text);
DROP FUNCTION hamming_text_op (text, text);
DROP FUNCTION hamming (varbit, varbit);
DROP FUNCTION hamming_op (varbit, varbit);

Expand Down

0 comments on commit c18a43d

Please sign in to comment.