Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature issue#16 #73

Merged
merged 10 commits into from
Feb 25, 2015
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 89 additions & 8 deletions create_shards.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "create_shards.h"
#include "ddl_commands.h"
#include "distribution_metadata.h"
#include "prune_shard_list.h"

#include <ctype.h>
#include <limits.h>
Expand All @@ -29,8 +30,13 @@
#include <string.h>

#include "access/attnum.h"
#include "access/hash.h"
#include "access/nbtree.h"
#include "access/skey.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "catalog/pg_am.h"
#include "commands/defrem.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"
Expand All @@ -50,6 +56,8 @@ static List * ParseWorkerNodeFile(char *workerNodeFilename);
static int CompareWorkerNodes(const void *leftElement, const void *rightElement);
static bool ExecuteRemoteCommand(PGconn *connection, const char *sqlCommand);
static text * IntegerToText(int32 value);
static Oid SupportFunctionForColumn(Var* partitionColumn, Oid accessMethodId,
int16 supportFunctionNumber);


/* declarations for dynamic loading */
Expand All @@ -70,9 +78,9 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
char partitionMethod = PG_GETARG_CHAR(2);
Oid distributedTableId = ResolveRelationId(tableNameText);
char relationKind = '\0';
AttrNumber partitionColumnId = InvalidAttrNumber;
char *partitionColumnName = text_to_cstring(partitionColumnText);
char *tableName = text_to_cstring(tableNameText);
Var *partitionColumnVar = NULL;

/* verify target relation is either regular or foreign table */
relationKind = get_rel_relkind(distributedTableId);
Expand All @@ -84,17 +92,53 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
"foreign tables.")));
}

/* verify column exists in given table */
partitionColumnId = get_attnum(distributedTableId, partitionColumnName);
if (partitionColumnId == InvalidAttrNumber)
/* ColumnNameToColumn verifies column exists in given table */
partitionColumnVar = ColumnNameToColumn(distributedTableId, partitionColumnName);

if (partitionMethod != HASH_PARTITION_TYPE)
{
ereport(ERROR, (errmsg("could not find column: %s", partitionColumnName)));
/* we only support hash partitioning right now */
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("pg_shard only supports hash partitioning")));
}

/* we only support hash partitioning method for now */
if (partitionMethod != HASH_PARTITION_TYPE)
/* depending on the partition type, check for the existence of support function */
if (partitionMethod == HASH_PARTITION_TYPE)
{
ereport(ERROR, (errmsg("unsupported partition method: %c", partitionMethod)));
Oid hashSupportFunction = SupportFunctionForColumn(partitionColumnVar,
HASH_AM_OID, HASHPROC);
if (hashSupportFunction == InvalidOid)
{
Oid partitionColumnTypeId = partitionColumnVar->vartype;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the point in this variable existing, so I'm going to remove it before merging (it's only referenced once).


ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify a hash function for type %s",
format_type_be(partitionColumnTypeId)),
errdetail("Partition column types must have a hash function "
"defined to use hash partitioning.")));
}
}
else if (partitionMethod == RANGE_PARTITION_TYPE)
{
/*
* Currently we do not support RANGE_PARTITION_TYPE. However, for the
* completeness of the code, we also check operators for range partitioning.
* TODO: Add regression tests for this check when RANGE_PARTITION_TYPE is
* supported.
*/
Oid btreeSupportFunction = SupportFunctionForColumn(partitionColumnVar,
BTREE_AM_OID, BTORDER_PROC);
if (btreeSupportFunction == InvalidOid)
{
Oid partitionColumnTypeId = partitionColumnVar->vartype;

ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify a comparison function for type %s",
format_type_be(partitionColumnTypeId)),
errdetail("Partition column types must have a comparison function "
"defined to use range partitioning.")));
}
}

/* insert row into the partition metadata table */
Expand Down Expand Up @@ -534,3 +578,40 @@ IntegerToText(int32 value)

return valueText;
}


/*
* SupportFunctionForColumn helps to find the support function given a column, an access
* method and id of a support function. This function returns InvalidOid if there is no
* support function associated with the data type of the column. Also, this function
* errors-out if there is no default operator class for the data type of the column.
*/
Oid
SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId,
int16 supportFunctionNumber)
{
Oid operatorFamilyId = InvalidOid;
Oid supportFunctionOid = InvalidOid;
Oid columnOid = partitionColumn->vartype;
Oid operatorClassId = GetDefaultOpClass(columnOid, accessMethodId);

/*
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is actually incorrect, I think… This documentation section shows that an index can be defined using a customer operator class rather than the default. So it's possible we could support types without default operator classes, so long as the user specified which custom operator class to use at the time of distribution. I'll update this comment to clarify that we just don't support anything other than using a default operator class at the moment.

* If data type of the partition column does not have a default operator class,
* we should not continue to get support function. A type with no default op
* class cannot have any support functions.
*/
if (operatorClassId == InvalidOid)
{
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("data type %s has no default operator class for specified"
" partition method", format_type_be(columnOid)),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slight alignment issue here. I'll fix before merge.

errdetail("Partition column types must have a default operator"
" class defined.")));
}

operatorFamilyId = get_opclass_family(operatorClassId);
supportFunctionOid = get_opfamily_proc(operatorFamilyId, columnOid, columnOid,
supportFunctionNumber);

return supportFunctionOid;
}
3 changes: 1 addition & 2 deletions distribution_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ static List *ShardIntervalListCache = NIL;


/* local function forward declarations */
static Var * ColumnNameToColumn(Oid relationId, char *columnName);
static void LoadShardIntervalRow(int64 shardId, Oid *relationId,
char **minValue, char **maxValue);
static ShardPlacement * TupleToShardPlacement(HeapTuple heapTuple,
Expand Down Expand Up @@ -445,7 +444,7 @@ IsDistributedTable(Oid tableId)
* a Var that represents that column in that relation. This function throws an
* error if the column doesn't exist or is a system column.
*/
static Var *
Var *
ColumnNameToColumn(Oid relationId, char *columnName)
{
Var *partitionColumn = NULL;
Expand Down
2 changes: 2 additions & 0 deletions distribution_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

/* denotes partition type of the distributed table */
#define HASH_PARTITION_TYPE 'h'
#define RANGE_PARTITION_TYPE 'r'

/* human-readable names for addressing columns of partition table */
#define PARTITION_TABLE_ATTRIBUTE_COUNT 3
Expand Down Expand Up @@ -139,6 +140,7 @@ extern List * LoadShardPlacementList(int64 shardId);
extern Var * PartitionColumn(Oid distributedTableId);
extern char PartitionType(Oid distributedTableId);
extern bool IsDistributedTable(Oid tableId);
extern Var * ColumnNameToColumn(Oid relationId, char *columnName);
extern void InsertPartitionRow(Oid distributedTableId, char partitionType,
text *partitionKeyText);
extern void InsertShardRow(Oid distributedTableId, uint64 shardId, char shardStorage,
Expand Down
37 changes: 34 additions & 3 deletions expected/create_shards.out.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,53 @@ CREATE FUNCTION create_table_then_fail(cstring, integer)
AS 'pg_shard'
LANGUAGE C STRICT;
-- ===================================================================
-- create test type, operator, operator family and operator class
-- ===================================================================
CREATE TYPE dummy_type AS (
i integer
);
CREATE FUNCTION dummy_type_function(dummy_type, dummy_type) RETURNS boolean
AS 'SELECT TRUE;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
CREATE OPERATOR = (
LEFTARG = dummy_type,
RIGHTARG = dummy_type,
PROCEDURE = dummy_type_function
);
CREATE OPERATOR FAMILY dummy_op_family USING hash;
-- create operator class with no support function
CREATE OPERATOR CLASS dummy_op_family_class
DEFAULT FOR TYPE dummy_type USING hash FAMILY dummy_op_family AS
OPERATOR 1 =;
-- ===================================================================
-- test shard creation functionality
-- ===================================================================
CREATE TABLE table_to_distribute (
name text,
id bigint PRIMARY KEY
id bigint PRIMARY KEY,
json_data json,
test_type_data dummy_type
);
-- use an index instead of table name
SELECT master_create_distributed_table('table_to_distribute_pkey', 'id');
ERROR: cannot distribute relation: "table_to_distribute_pkey"
DETAIL: Distributed relations must be regular or foreign tables.
-- use a bad column name
SELECT master_create_distributed_table('table_to_distribute', 'bad_column');
ERROR: could not find column: bad_column
ERROR: partition column "bad_column" not found
-- use unsupported partition type
SELECT master_create_distributed_table('table_to_distribute', 'name', 'r');
ERROR: unsupported partition method: r
ERROR: pg_shard only supports hash partitioning
-- use unsupported partition column which does not have any default op class
SELECT master_create_distributed_table('table_to_distribute', 'json_data');
ERROR: data type json has no default operator class for specified partition method
DETAIL: Partition column types must have a default operator class defined.
-- use unsupported partition column which does not have required support functions
SELECT master_create_distributed_table('table_to_distribute', 'test_type_data');
ERROR: could not identify a hash function for type dummy_type
DETAIL: Partition column types must have a hash function defined to use hash partitioning.
-- distribute table and inspect side effects
SELECT master_create_distributed_table('table_to_distribute', 'name');
master_create_distributed_table
Expand Down
3 changes: 1 addition & 2 deletions prune_shard_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ static List *OperatorIdCache = NIL;

/* local function forward declarations */
static Oid LookupOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);
static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);
static bool SimpleOpExpression(Expr *clause);
static Node * HashableClauseMutator(Node *originalNode, Var *partitionColumn);
static bool OpExpressionContainsColumn(OpExpr *operatorExpression, Var *partitionColumn);
Expand Down Expand Up @@ -305,7 +304,7 @@ LookupOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber)
* GetOperatorByType returns the operator oid for the given type, access
* method, and strategy number.
*/
static Oid
Oid
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're starting to see we might want to move some of these common functions out into their own file.

GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber)
{
/* Get default operator class from pg_opclass */
Expand Down
1 change: 1 addition & 0 deletions prune_shard_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ typedef struct OperatorIdCacheEntry
extern List * PruneShardList(Oid relationId, List *whereClauseList,
List *shardIntervalList);
extern OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber);
extern Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);


#endif /* PG_SHARD_PRUNE_SHARD_LIST_H */
35 changes: 34 additions & 1 deletion sql/create_shards.sql.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,40 @@ CREATE FUNCTION create_table_then_fail(cstring, integer)
AS 'pg_shard'
LANGUAGE C STRICT;

-- ===================================================================
-- create test type, operator, operator family and operator class
-- ===================================================================
CREATE TYPE dummy_type AS (
i integer
);

CREATE FUNCTION dummy_type_function(dummy_type, dummy_type) RETURNS boolean
AS 'SELECT TRUE;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;

CREATE OPERATOR = (
LEFTARG = dummy_type,
RIGHTARG = dummy_type,
PROCEDURE = dummy_type_function
);
CREATE OPERATOR FAMILY dummy_op_family USING hash;

-- create operator class with no support function
CREATE OPERATOR CLASS dummy_op_family_class
DEFAULT FOR TYPE dummy_type USING hash FAMILY dummy_op_family AS
OPERATOR 1 =;

-- ===================================================================
-- test shard creation functionality
-- ===================================================================

CREATE TABLE table_to_distribute (
name text,
id bigint PRIMARY KEY
id bigint PRIMARY KEY,
json_data json,
test_type_data dummy_type
);

-- use an index instead of table name
Expand All @@ -30,6 +57,12 @@ SELECT master_create_distributed_table('table_to_distribute', 'bad_column');
-- use unsupported partition type
SELECT master_create_distributed_table('table_to_distribute', 'name', 'r');

-- use unsupported partition column which does not have any default op class
SELECT master_create_distributed_table('table_to_distribute', 'json_data');

-- use unsupported partition column which does not have required support functions
SELECT master_create_distributed_table('table_to_distribute', 'test_type_data');

-- distribute table and inspect side effects
SELECT master_create_distributed_table('table_to_distribute', 'name');
SELECT partition_method, key FROM pgs_distribution_metadata.partition
Expand Down