Skip to content

Commit

Permalink
Consider column widths when estimating the cost of scanning.
Browse files Browse the repository at this point in the history
  • Loading branch information
pykello committed May 11, 2015
1 parent 5cc7013 commit 5006da5
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 29 deletions.
167 changes: 140 additions & 27 deletions cstore_fdw.c
Expand Up @@ -47,6 +47,7 @@
#include "utils/memutils.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/syscache.h"


/* local functions forward declarations */
Expand Down Expand Up @@ -83,6 +84,9 @@ static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel,
Oid foreignTableId);
static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel,
Oid foreignTableId);
static double QueryPageCountEstimate(RelOptInfo *baserel, Oid foreignTableId);
static double RandomPageEstimate(RelOptInfo *baserel, Oid foreignTableId);
static double AttributeNullFraction(Oid relationId, AttrNumber attributeNumber);
static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel,
Oid foreignTableId, ForeignPath *bestPath,
List *targetList, List *scanClauses);
Expand Down Expand Up @@ -1091,12 +1095,89 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId
{
Path *foreignScanPath = NULL;
CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId);
Relation relation = heap_open(foreignTableId, AccessShareLock);

double queryPageCountEstimate = QueryPageCountEstimate(baserel, foreignTableId);
double randomAccessEstimate = RandomPageEstimate(baserel, foreignTableId);
double totalDiskAccessCost = seq_page_cost * queryPageCountEstimate +
random_page_cost * randomAccessEstimate;

/*
* We skip reading columns that are not in query. Here we assume that all
* columns in relation have the same width, and estimate the number pages
* that will be read by query.
* We estimate costs almost the same way as cost_seqscan(), thus assuming
* that I/O costs are equivalent to a regular table file of the same size.
*/
double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename);
double filterCostPerTuple = baserel->baserestrictcost.per_tuple;
double cpuCostPerTuple = cpu_tuple_cost + filterCostPerTuple;
double totalCpuCost = cpuCostPerTuple * tupleCountEstimate;

double startupCost = baserel->baserestrictcost.startup;
double totalCost = startupCost + totalCpuCost + totalDiskAccessCost;

/* create a foreign path node and add it as the only possible path */
foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows,
startupCost, totalCost,
NIL, /* no known ordering */
NULL, /* not parameterized */
NIL); /* no fdw_private */

add_path(baserel, foreignScanPath);
}


/*
* QueryPageCountEstimate returns estimated number of pages of the given cstore
* table that a query with given baserel will read.
*/
static double
QueryPageCountEstimate(RelOptInfo *baserel, Oid foreignTableId)
{
double queryPageCountEstimate = 0.0;
double queryColumnRatio = 0.0;
double relationColumnWidth = 0;
double queryColumnWidth = 0;
int32 columnIndex = 0;
BlockNumber relationPageCount = 0;
CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId);

AttrNumber columnCount = baserel->max_attr;
List *queryColumnList = ColumnList(baserel);
bool *projectedColumnMask = ProjectedColumnMask(columnCount, queryColumnList);

/* calculate relation column width and query column width */
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
{
AttrNumber columnAttributeNumber = columnIndex + 1;
double nullFraction = AttributeNullFraction(foreignTableId,
columnAttributeNumber);

int32 columnWidth = get_attavgwidth(foreignTableId, columnAttributeNumber);
if (columnWidth == 0)
{
/* if not analyzed yet, assume all columns have same width */
relationColumnWidth++;

if (projectedColumnMask[columnIndex])
{
queryColumnWidth++;
}
}
else
{
/*
* We don't store any values for NULL values, and get_attavgwidth()
* doesn't take into account NULL values.
*/
relationColumnWidth += columnWidth * (1.0 - nullFraction);

if (projectedColumnMask[columnIndex])
{
queryColumnWidth += columnWidth * (1.0 - nullFraction);
}
}
}

/*
* We skip reading columns that are not in query.
*
* Ideally, we should also take into account the row blocks that will be
* suppressed. But for that we need to know which columns are used for
Expand All @@ -1109,37 +1190,69 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId
* algorithm and using the correlation statistics to detect which columns
* are in stored in sorted order.
*/
List *queryColumnList = ColumnList(baserel);
uint32 queryColumnCount = list_length(queryColumnList);
BlockNumber relationPageCount = PageCount(cstoreFdwOptions->filename);
uint32 relationColumnCount = RelationGetNumberOfAttributes(relation);
relationPageCount = PageCount(cstoreFdwOptions->filename);
queryColumnRatio = queryColumnWidth / relationColumnWidth;
queryPageCountEstimate = relationPageCount * queryColumnRatio;

double queryColumnRatio = (double) queryColumnCount / relationColumnCount;
double queryPageCount = relationPageCount * queryColumnRatio;
double totalDiskAccessCost = seq_page_cost * queryPageCount;
return queryPageCountEstimate;
}

double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename);

/*
* RandomPageEstimate estimates the number of random page accesses we need to do
* when reading a cstore file.
*/
static double
RandomPageEstimate(RelOptInfo *baserel, Oid foreignTableId)
{
double randomPageEstimate = 0.0;
double tupleCountEstimate = 0.0;
double stripeCountEstimate = 0.0;

AttrNumber columnCount = baserel->max_attr;
CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId);

tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename);
stripeCountEstimate = tupleCountEstimate / cstoreFdwOptions->stripeRowCount + 1;

/*
* We estimate costs almost the same way as cost_seqscan(), thus assuming
* that I/O costs are equivalent to a regular table file of the same size.
* The first step when reading a stripe is to read its footer, then we need
* to move the header back to read the stripe skip list. This adds two random
* page accesses per stripe.
*/
double filterCostPerTuple = baserel->baserestrictcost.per_tuple;
double cpuCostPerTuple = cpu_tuple_cost + filterCostPerTuple;
double totalCpuCost = cpuCostPerTuple * tupleCountEstimate;
randomPageEstimate += 2.0 * stripeCountEstimate;

double startupCost = baserel->baserestrictcost.startup;
double totalCost = startupCost + totalCpuCost + totalDiskAccessCost;
/*
* When we finish reading a column, we move to the beginning of next column.
* Since we skip unused columns, this is a random access in most cases.
*/
randomPageEstimate += stripeCountEstimate * (double) columnCount;

/* create a foreign path node and add it as the only possible path */
foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows,
startupCost, totalCost,
NIL, /* no known ordering */
NULL, /* not parameterized */
NIL); /* no fdw_private */
return randomPageEstimate;
}

add_path(baserel, foreignScanPath);
heap_close(relation, AccessShareLock);

/*
* AttributeNullFraction returns the fraction of items for the given attribute
* that are NULL. This is extracted from statistics gathered from last ANALYZE.
* If relation is never analyzed, this function returns 0.
*/
static double
AttributeNullFraction(Oid relationId, AttrNumber attributeNumber)
{
double nullFraction = 0.0;

HeapTuple heapTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relationId),
Int16GetDatum(attributeNumber),
BoolGetDatum(false));
if (HeapTupleIsValid(heapTuple))
{
nullFraction = ((Form_pg_statistic) GETSTRUCT(heapTuple))->stanullfrac;
ReleaseSysCache(heapTuple);
}

return nullFraction;
}


Expand Down
1 change: 1 addition & 0 deletions cstore_fdw.h
Expand Up @@ -326,6 +326,7 @@ extern bool CStoreReadFinished(TableReadState *state);
extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues,
bool *columnNulls);
extern void CStoreEndRead(TableReadState *state);
extern bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList);

/* Function declarations for common functions */
extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId,
Expand Down
3 changes: 1 addition & 2 deletions cstore_reader.c
Expand Up @@ -66,7 +66,6 @@ static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValu
static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList,
bool *selectedBlockMask);
static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList);
static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList);
static void DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray,
uint32 boolArrayLength);
static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray,
Expand Down Expand Up @@ -939,7 +938,7 @@ StripeSkipListRowCount(StripeSkipList *stripeSkipList)
* ProjectedColumnMask returns a boolean array in which the projected columns
* from the projected column list are marked as true.
*/
static bool *
bool *
ProjectedColumnMask(uint32 columnCount, List *projectedColumnList)
{
bool *projectedColumnMask = palloc0(columnCount * sizeof(bool));
Expand Down

0 comments on commit 5006da5

Please sign in to comment.