Skip to content

Commit

Permalink
Measure the number of all-visible pages for use in index-only scan co…
Browse files Browse the repository at this point in the history
…sting.

Add a column pg_class.relallvisible to remember the number of pages that
were all-visible according to the visibility map as of the last VACUUM
(or ANALYZE, or some other operations that update pg_class.relpages).
Use relallvisible/relpages, instead of an arbitrary constant, to estimate
how many heap page fetches can be avoided during an index-only scan.

This is pretty primitive and will no doubt see refinements once we've
acquired more field experience with the index-only scan mechanism, but
it's way better than using a constant.

Note: I had to adjust an underspecified query in the window.sql regression
test, because it was changing answers when the plan changed to use an
index-only scan.  Some of the adjacent tests perhaps should be adjusted
as well, but I didn't do that here.
  • Loading branch information
tglsfdc committed Oct 14, 2011
1 parent dea95c7 commit e6858e6
Show file tree
Hide file tree
Showing 22 changed files with 246 additions and 72 deletions.
13 changes: 13 additions & 0 deletions doc/src/sgml/catalogs.sgml
Original file line number Diff line number Diff line change
Expand Up @@ -1654,6 +1654,19 @@
</entry>
</row>

<row>
<entry><structfield>relallvisible</structfield></entry>
<entry><type>int4</type></entry>
<entry></entry>
<entry>
Number of pages that are marked all-visible in the table's
visibility map. This is only an estimate used by the
planner. It is updated by <command>VACUUM</command>,
<command>ANALYZE</command>, and a few DDL commands such as
<command>CREATE INDEX</command>.
</entry>
</row>

<row>
<entry><structfield>reltoastrelid</structfield></entry>
<entry><type>oid</type></entry>
Expand Down
3 changes: 2 additions & 1 deletion src/backend/access/hash/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS)
IndexBuildResult *result;
BlockNumber relpages;
double reltuples;
double allvisfrac;
uint32 num_buckets;
HashBuildState buildstate;

Expand All @@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS)
RelationGetRelationName(index));

/* Estimate the number of rows currently present in the table */
estimate_rel_size(heap, NULL, &relpages, &reltuples);
estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);

/* Initialize the hash index metadata page and initial buckets */
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
Expand Down
68 changes: 68 additions & 0 deletions src/backend/access/heap/visibilitymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* visibilitymap_pin_ok - check whether correct map page is already pinned
* visibilitymap_set - set a bit in a previously pinned page
* visibilitymap_test - test if a bit is set
* visibilitymap_count - count number of bits set in visibility map
* visibilitymap_truncate - truncate the visibility map
*
* NOTES
*
Expand Down Expand Up @@ -110,6 +112,26 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)

/* table for fast counting of set bits */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};

/* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
static void vm_extend(Relation rel, BlockNumber nvmblocks);
Expand Down Expand Up @@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
return result;
}

/*
* visibilitymap_count - count number of bits set in visibility map
*
* Note: we ignore the possibility of race conditions when the table is being
* extended concurrently with the call. New pages added to the table aren't
* going to be marked all-visible, so they won't affect the result.
*/
BlockNumber
visibilitymap_count(Relation rel)
{
BlockNumber result = 0;
BlockNumber mapBlock;

for (mapBlock = 0; ; mapBlock++)
{
Buffer mapBuffer;
unsigned char *map;
int i;

/*
* Read till we fall off the end of the map. We assume that any
* extra bytes in the last page are zeroed, so we don't bother
* excluding them from the count.
*/
mapBuffer = vm_readbuf(rel, mapBlock, false);
if (!BufferIsValid(mapBuffer))
break;

/*
* We choose not to lock the page, since the result is going to be
* immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value.
*/
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));

for (i = 0; i < MAPSIZE; i++)
{
result += number_of_ones[map[i]];
}

ReleaseBuffer(mapBuffer);
}

return result;
}

/*
* visibilitymap_truncate - truncate the visibility map
*
Expand Down
4 changes: 4 additions & 0 deletions src/backend/catalog/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc,
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid);
values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
Expand Down Expand Up @@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc,
/* The relation is real, but as yet empty */
new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0;
new_rel_reltup->relallvisible = 0;
break;
case RELKIND_SEQUENCE:
/* Sequences always have a known size */
new_rel_reltup->relpages = 1;
new_rel_reltup->reltuples = 1;
new_rel_reltup->relallvisible = 0;
break;
default:
/* Views, etc, have no disk storage */
new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0;
new_rel_reltup->relallvisible = 0;
break;
}

Expand Down
52 changes: 36 additions & 16 deletions src/backend/catalog/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "access/relscan.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "bootstrap/bootstrap.h"
#include "catalog/catalog.h"
Expand Down Expand Up @@ -1059,7 +1060,7 @@ index_create(Relation heapRelation,
true,
isprimary,
InvalidOid,
heapRelation->rd_rel->reltuples);
-1.0);
/* Make the above update visible */
CommandCounterIncrement();
}
Expand Down Expand Up @@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation,
true,
true,
InvalidOid,
heapRelation->rd_rel->reltuples);
-1.0);

/*
* If needed, mark the index as primary and/or deferred in pg_index.
Expand Down Expand Up @@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo,
* isprimary: if true, set relhaspkey true; else no change
* reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
* else no change
* reltuples: set reltuples to this value
* reltuples: if >= 0, set reltuples to this value; else no change
*
* relpages is also updated (using RelationGetNumberOfBlocks()).
* If reltuples >= 0, relpages and relallvisible are also updated (using
* RelationGetNumberOfBlocks() and visibilitymap_count()).
*
* NOTE: an important side-effect of this operation is that an SI invalidation
* message is sent out to all backends --- including me --- causing relcache
Expand All @@ -1550,7 +1552,6 @@ index_update_stats(Relation rel,
bool hasindex, bool isprimary,
Oid reltoastidxid, double reltuples)
{
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
Oid relid = RelationGetRelid(rel);
Relation pg_class;
HeapTuple tuple;
Expand Down Expand Up @@ -1586,9 +1587,11 @@ index_update_stats(Relation rel,
* It is safe to use a non-transactional update even though our
* transaction could still fail before committing. Setting relhasindex
* true is safe even if there are no indexes (VACUUM will eventually fix
* it), likewise for relhaspkey. And of course the relpages and reltuples
* counts are correct (or at least more so than the old values)
* regardless.
* it), likewise for relhaspkey. And of course the new relpages and
* reltuples counts are correct regardless. However, we don't want to
* change relpages (or relallvisible) if the caller isn't providing an
* updated reltuples count, because that would bollix the
* reltuples/relpages ratio which is what's really important.
*/

pg_class = heap_open(RelationRelationId, RowExclusiveLock);
Expand Down Expand Up @@ -1650,15 +1653,32 @@ index_update_stats(Relation rel,
dirty = true;
}
}
if (rd_rel->reltuples != (float4) reltuples)
{
rd_rel->reltuples = (float4) reltuples;
dirty = true;
}
if (rd_rel->relpages != (int32) relpages)

if (reltuples >= 0)
{
rd_rel->relpages = (int32) relpages;
dirty = true;
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
BlockNumber relallvisible;

if (rd_rel->relkind != RELKIND_INDEX)
relallvisible = visibilitymap_count(rel);
else /* don't bother for indexes */
relallvisible = 0;

if (rd_rel->relpages != (int32) relpages)
{
rd_rel->relpages = (int32) relpages;
dirty = true;
}
if (rd_rel->reltuples != (float4) reltuples)
{
rd_rel->reltuples = (float4) reltuples;
dirty = true;
}
if (rd_rel->relallvisible != (int32) relallvisible)
{
rd_rel->relallvisible = (int32) relallvisible;
dirty = true;
}
}

/*
Expand Down
11 changes: 9 additions & 2 deletions src/backend/commands/analyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
Expand Down Expand Up @@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
if (!inh)
vac_update_relstats(onerel,
RelationGetNumberOfBlocks(onerel),
totalrows, hasindex, InvalidTransactionId);
totalrows,
visibilitymap_count(onerel),
hasindex,
InvalidTransactionId);

/*
* Same for indexes. Vacuum always scans all indexes, so if we're part of
Expand All @@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
totalindexrows = ceil(thisdata->tupleFract * totalrows);
vac_update_relstats(Irel[ind],
RelationGetNumberOfBlocks(Irel[ind]),
totalindexrows, false, InvalidTransactionId);
totalindexrows,
0,
false,
InvalidTransactionId);
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/backend/commands/cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
{
int4 swap_pages;
float4 swap_tuples;
int4 swap_allvisible;

swap_pages = relform1->relpages;
relform1->relpages = relform2->relpages;
Expand All @@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
swap_tuples = relform1->reltuples;
relform1->reltuples = relform2->reltuples;
relform2->reltuples = swap_tuples;

swap_allvisible = relform1->relallvisible;
relform1->relallvisible = relform2->relallvisible;
relform2->relallvisible = swap_allvisible;
}

/*
Expand Down
6 changes: 6 additions & 0 deletions src/backend/commands/vacuum.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
void
vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
bool hasindex, TransactionId frozenxid)
{
Oid relid = RelationGetRelid(relation);
Expand Down Expand Up @@ -599,6 +600,11 @@ vac_update_relstats(Relation relation,
pgcform->reltuples = (float4) num_tuples;
dirty = true;
}
if (pgcform->relallvisible != (int32) num_all_visible_pages)
{
pgcform->relallvisible = (int32) num_all_visible_pages;
dirty = true;
}
if (pgcform->relhasindex != hasindex)
{
pgcform->relhasindex = hasindex;
Expand Down
20 changes: 17 additions & 3 deletions src/backend/commands/vacuumlazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
TransactionId freezeTableLimit;
BlockNumber new_rel_pages;
double new_rel_tuples;
BlockNumber new_rel_allvisible;
TransactionId new_frozen_xid;

/* measure elapsed time iff autovacuum logging requires it */
Expand Down Expand Up @@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
* density") with nonzero relpages and reltuples=0 (which means "zero
* tuple density") unless there's some actual evidence for the latter.
*
* We do update relallvisible even in the corner case, since if the
* table is all-visible we'd definitely like to know that. But clamp
* the value to be not more than what we're setting relpages to.
*
* Also, don't change relfrozenxid if we skipped any pages, since then
* we don't know for certain that all tuples have a newer xmin.
*/
Expand All @@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
new_rel_tuples = vacrelstats->old_rel_tuples;
}

new_rel_allvisible = visibilitymap_count(onerel);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;

new_frozen_xid = FreezeLimit;
if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
new_frozen_xid = InvalidTransactionId;

vac_update_relstats(onerel,
new_rel_pages, new_rel_tuples,
new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
vacrelstats->hasindex,
new_frozen_xid);

Expand Down Expand Up @@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel,
*/
if (!stats->estimated_count)
vac_update_relstats(indrel,
stats->num_pages, stats->num_index_tuples,
false, InvalidTransactionId);
stats->num_pages,
stats->num_index_tuples,
0,
false,
InvalidTransactionId);

ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
Expand Down
1 change: 1 addition & 0 deletions src/backend/nodes/outfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
WRITE_NODE_FIELD(indexlist);
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
WRITE_NODE_FIELD(subplan);
WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(baserestrictinfo);
Expand Down
Loading

0 comments on commit e6858e6

Please sign in to comment.