Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions pg_diffix--0.0.1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,29 @@ $$;
* ----------------------------------------------------------------
*/

CREATE FUNCTION diffix.lcf_transfn(internal, variadic "any")
CREATE FUNCTION diffix.lcf_transfn(internal, variadic aids "any")
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.lcf_finalfn(internal, variadic "any")
CREATE FUNCTION diffix.lcf_finalfn(internal, variadic aids "any")
RETURNS boolean
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.lcf_explain_finalfn(internal, variadic "any")
CREATE FUNCTION diffix.lcf_explain_finalfn(internal, variadic aids "any")
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE AGGREGATE diffix.lcf(variadic "any") (
CREATE AGGREGATE diffix.lcf(variadic aids "any") (
sfunc = diffix.lcf_transfn,
stype = internal,
finalfunc = diffix.lcf_finalfn,
finalfunc_extra
);

CREATE AGGREGATE diffix.explain_lcf(variadic "any") (
CREATE AGGREGATE diffix.explain_lcf(variadic aids "any") (
sfunc = diffix.lcf_transfn,
stype = internal,
finalfunc = diffix.lcf_explain_finalfn,
Expand All @@ -73,29 +73,29 @@ CREATE AGGREGATE diffix.explain_lcf(variadic "any") (
* ----------------------------------------------------------------
*/

CREATE FUNCTION diffix.anon_count_distinct_transfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_distinct_transfn(internal, value "any", variadic aids "any")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have value here now I'd revert to any in the comments (both sql and C). I think it's better because we have no typing in C-land and that's the closest we can have to a signature. This will become noticeable when we get more UDFs and possibly overloads. Imagine we have a max function of datetime and integer, would you rather write them as

max(value)
max(value)

or

max(integer)
max(datetime)

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but if you put any, aids... there it doesn't make much sense, neither does any, any.... Documenting "what" the argument represents is more useful than the any type. Since those are caption comments, maybe we can drop the arguments - everything is properly documented in the CREATE ... statements anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aid has special meaning for us because we produce the aid adapters in code and we know right away what to expect. any, aids... is perfectly fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, let it be, you mean like this, correct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The latest commit looks nice. We have experimented with making it a proper postgres type but without success and ended up with our current design with the AidSpec stuff.

RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_distinct_finalfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_distinct_finalfn(internal, value "any", variadic aids "any")
RETURNS int8
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_distinct_explain_finalfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_distinct_explain_finalfn(internal, value "any", variadic aids "any")
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE AGGREGATE diffix.anon_count_distinct("any", variadic "any") (
CREATE AGGREGATE diffix.anon_count_distinct(value "any", variadic aids "any") (
sfunc = diffix.anon_count_distinct_transfn,
stype = internal,
finalfunc = diffix.anon_count_distinct_finalfn,
finalfunc_extra
);

CREATE AGGREGATE diffix.explain_anon_count_distinct("any", variadic "any") (
CREATE AGGREGATE diffix.explain_anon_count_distinct(value "any", variadic aids "any") (
sfunc = diffix.anon_count_distinct_transfn,
stype = internal,
finalfunc = diffix.anon_count_distinct_explain_finalfn,
Expand All @@ -107,63 +107,63 @@ CREATE AGGREGATE diffix.explain_anon_count_distinct("any", variadic "any") (
* ----------------------------------------------------------------
*/

CREATE FUNCTION diffix.anon_count_transfn(internal, variadic "any")
CREATE FUNCTION diffix.anon_count_transfn(internal, variadic aids "any")
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_finalfn(internal, variadic "any")
CREATE FUNCTION diffix.anon_count_finalfn(internal, variadic aids "any")
RETURNS int8
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_explain_finalfn(internal, variadic "any")
CREATE FUNCTION diffix.anon_count_explain_finalfn(internal, variadic aids "any")
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE AGGREGATE diffix.anon_count(variadic "any") (
CREATE AGGREGATE diffix.anon_count(variadic aids "any") (
sfunc = diffix.anon_count_transfn,
stype = internal,
finalfunc = diffix.anon_count_finalfn,
finalfunc_extra
);

CREATE AGGREGATE diffix.explain_anon_count(variadic "any") (
CREATE AGGREGATE diffix.explain_anon_count(variadic aids "any") (
sfunc = diffix.anon_count_transfn,
stype = internal,
finalfunc = diffix.anon_count_explain_finalfn,
finalfunc_extra
);

/* ----------------------------------------------------------------
* anon_count(any, aids...)
* anon_count_any(any, aids...)
* ----------------------------------------------------------------
*/

CREATE FUNCTION diffix.anon_count_any_transfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_any_transfn(internal, value "any", variadic aids "any")
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_any_finalfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_any_finalfn(internal, value "any", variadic aids "any")
RETURNS int8
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE FUNCTION diffix.anon_count_any_explain_finalfn(internal, "any", variadic "any")
CREATE FUNCTION diffix.anon_count_any_explain_finalfn(internal, value "any", variadic aids "any")
RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE C STABLE;

CREATE AGGREGATE diffix.anon_count_any("any", variadic "any") (
CREATE AGGREGATE diffix.anon_count_any(value "any", variadic aids "any") (
sfunc = diffix.anon_count_any_transfn,
stype = internal,
finalfunc = diffix.anon_count_any_finalfn,
finalfunc_extra
);

CREATE AGGREGATE diffix.explain_anon_count_any("any", variadic "any") (
CREATE AGGREGATE diffix.explain_anon_count_any(value "any", variadic aids "any") (
sfunc = diffix.anon_count_any_transfn,
stype = internal,
finalfunc = diffix.anon_count_any_explain_finalfn,
Expand Down
2 changes: 1 addition & 1 deletion pg_diffix/aggregation/count.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ extern CountResult aggregate_count_contributions(
typedef struct CountResultAccumulator
{
int64 max_flattening;
int64 max_flattened_count;
int64 max_flattened_count_with_max_flattening;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is somewhat along the lines of noise_with_max_sigma. This field doesn't hold the max_flattened_count, but a max from the subset having max_flattening. I thought I could trade removing a comment for a longer name.

double max_noise_sigma;
int64 noise_with_max_sigma;
} CountResultAccumulator;
Expand Down
10 changes: 5 additions & 5 deletions pg_diffix/query/oid_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ typedef struct Oids
{
Oid count; /* count(*) */
Oid count_any; /* count(any) */
Oid lcf; /* lcf(aid) */
Oid anon_count_distinct; /* anon_count_distinct(aid) */
Oid anon_count; /* anon_count(aid) */
Oid anon_count_any; /* anon_count(aid, any) */
Oid generate_series; /* generate_series(aid, any) */
Oid lcf; /* lcf(aids...) */
Oid anon_count_distinct; /* anon_count_distinct(any, aids...) */
Oid anon_count; /* anon_count(aids...) */
Oid anon_count_any; /* anon_count_any(any, aids...) */
Oid generate_series; /* generate_series(start, stop) */
} Oids;

/*
Expand Down
40 changes: 21 additions & 19 deletions src/aggregation/count.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const ContributionDescriptor count_descriptor = {

static const int COUNT_AIDS_OFFSET = 1;
static const int COUNT_ANY_AIDS_OFFSET = 2;
static const int VALUE_INDEX = 1;

PG_FUNCTION_INFO_V1(anon_count_transfn);
PG_FUNCTION_INFO_V1(anon_count_finalfn);
Expand All @@ -53,13 +54,13 @@ Datum anon_count_transfn(PG_FUNCTION_ARGS)

Assert(PG_NARGS() == list_length(trackers) + COUNT_AIDS_OFFSET);

ListCell *lc;
foreach (lc, trackers)
ListCell *cell;
foreach (cell, trackers)
{
int aid_index = foreach_current_index(lc) + COUNT_AIDS_OFFSET;
int aid_index = foreach_current_index(cell) + COUNT_AIDS_OFFSET;
if (!PG_ARGISNULL(aid_index))
{
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(lc);
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(cell);
aid_t aid = tracker->aid_descriptor.make_aid(PG_GETARG_DATUM(aid_index));
contribution_tracker_update_contribution(tracker, aid, one_contribution);
}
Expand All @@ -74,15 +75,16 @@ Datum anon_count_any_transfn(PG_FUNCTION_ARGS)

Assert(PG_NARGS() == list_length(trackers) + COUNT_ANY_AIDS_OFFSET);

ListCell *lc;
foreach (lc, trackers)
ListCell *cell;
foreach (cell, trackers)
{
int aid_index = foreach_current_index(lc) + COUNT_ANY_AIDS_OFFSET;
int aid_index = foreach_current_index(cell) + COUNT_ANY_AIDS_OFFSET;
if (!PG_ARGISNULL(aid_index))
{
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(lc);
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(cell);
aid_t aid = tracker->aid_descriptor.make_aid(PG_GETARG_DATUM(aid_index));
if (PG_ARGISNULL(1))
if (PG_ARGISNULL(VALUE_INDEX))
/* count argument is NULL, so no contribution, only keep track of the AID value */
contribution_tracker_update_aid(tracker, aid);
else
contribution_tracker_update_contribution(tracker, aid, one_contribution);
Expand Down Expand Up @@ -148,13 +150,13 @@ static Datum explain_count_trackers(List *trackers)
StringInfoData string;
initStringInfo(&string);

ListCell *lc;
foreach (lc, trackers)
ListCell *cell;
foreach (cell, trackers)
{
if (foreach_current_index(lc) > 0)
if (foreach_current_index(cell) > 0)
appendStringInfo(&string, " \n");

ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(lc);
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(cell);
append_tracker_info(&string, tracker);
}

Expand Down Expand Up @@ -240,8 +242,8 @@ void accumulate_count_result(CountResultAccumulator *accumulator, const CountRes
if (flattening >= accumulator->max_flattening)
{
accumulator->max_flattening = flattening;
/* Get the largest flattened count from the ones with the maximum flattening. */
accumulator->max_flattened_count = Max(accumulator->max_flattened_count, result->flattened_count);
accumulator->max_flattened_count_with_max_flattening = Max(accumulator->max_flattened_count_with_max_flattening,
result->flattened_count);
}

if (result->noise_sigma > accumulator->max_noise_sigma)
Expand All @@ -253,17 +255,17 @@ void accumulate_count_result(CountResultAccumulator *accumulator, const CountRes

int64 finalize_count_result(const CountResultAccumulator *accumulator)
{
return Max(accumulator->max_flattened_count + accumulator->noise_with_max_sigma, 0);
return Max(accumulator->max_flattened_count_with_max_flattening + accumulator->noise_with_max_sigma, 0);
}

static Datum count_calculate_final(PG_FUNCTION_ARGS, List *trackers)
{
CountResultAccumulator result_accumulator = {0};

ListCell *lc;
foreach (lc, trackers)
ListCell *cell;
foreach (cell, trackers)
{
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(lc);
ContributionTrackerState *tracker = (ContributionTrackerState *)lfirst(cell);
CountResult result = count_calculate_aid_result(tracker);

if (result.low_count)
Expand Down
Loading