Skip to content
Permalink
Browse files
HAWQ-1815. native orc supports udt
  • Loading branch information
ztao1987 committed Nov 18, 2021
1 parent 6c7c752 commit c82cf9f2a2f61859918f34401c6cad26cd912490
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 112 deletions.
@@ -337,36 +337,44 @@ Datum orc_validate_encodings(PG_FUNCTION_ARGS)
* void
* orc_validate_datatypes(TupleDesc tupDesc)
*/
Datum orc_validate_datatypes(PG_FUNCTION_ARGS)
{
PlugStorageValidator psv = (PlugStorageValidator) (fcinfo->context);
TupleDesc tup_desc = psv->tuple_desc;

for (int i = 0; i < tup_desc->natts; ++i)
{
int32_t datatype =
(int32_t) (((Form_pg_attribute) (tup_desc->attrs[i]))->atttypid);
int4 typmod = ((Form_pg_attribute) (tup_desc->attrs[i]))->atttypmod;

if (checkORCUnsupportedDataType(datatype))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), errmsg("unsupported data types %s for columns of external ORC table is specified.", TypeNameToString(makeTypeNameFromOid(datatype, -1))), errOmitLocation(true)));
}
if (HAWQ_TYPE_NUMERIC == datatype)
{
int4 tmp_typmod = typmod - VARHDRSZ;
int precision = (tmp_typmod >> 16) & 0xffff;
int scale = tmp_typmod & 0xffff;
if (precision < 1 || 38 < precision)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ORC DECIMAL precision must be between 1 and 38")));
if (scale == 0)
ereport(NOTICE, (errmsg("Using a scale of zero for ORC DECIMAL")));
}
}
Datum orc_validate_datatypes(PG_FUNCTION_ARGS) {
PlugStorageValidator psv = (PlugStorageValidator)(fcinfo->context);
TupleDesc tup_desc = psv->tuple_desc;

for (int i = 0; i < tup_desc->natts; ++i) {
int32_t datatype =
(int32_t)(((Form_pg_attribute)(tup_desc->attrs[i]))->atttypid);
int4 typmod = ((Form_pg_attribute)(tup_desc->attrs[i]))->atttypmod;

if (checkORCUnsupportedDataType(datatype)) {
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unsupported data types %s for columns of external ORC "
"table is specified.",
TypeNameToString(makeTypeNameFromOid(datatype, -1))),
errOmitLocation(true)));
}
if (HAWQ_TYPE_NUMERIC == datatype) {
int4 tmp_typmod = typmod - VARHDRSZ;
int precision = (tmp_typmod >> 16) & 0xffff;
int scale = tmp_typmod & 0xffff;
if (precision < 1 || 38 < precision)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ORC DECIMAL precision must be between 1 and 38")));
if (scale == 0)
ereport(NOTICE, (errmsg("Using a scale of zero for ORC DECIMAL")));
}
if (HAEQ_TYPE_UDT(datatype))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unsupported data types %s for columns of external ORC "
"table is specified.",
TypeNameToString(makeTypeNameFromOid(datatype, -1))),
errOmitLocation(true)));
}

PG_RETURN_VOID() ;
PG_RETURN_VOID();
}

/*
@@ -74,6 +74,7 @@ typedef struct OrcFormatData {
char **colRawValues;
uint64 *colValLength;
TimestampType *colTimestamp;
struct varlena **colFixedLenUDT;
} OrcFormatData;

static void initOrcFormatUserData(TupleDesc tup_desc,
@@ -86,8 +87,16 @@ static void initOrcFormatUserData(TupleDesc tup_desc,
orcFormatData->colRawValues = palloc0(sizeof(char *) * natts);
orcFormatData->colValLength = palloc0(sizeof(uint64) * natts);
orcFormatData->colTimestamp = palloc0(sizeof(TimestampType) * natts);
orcFormatData->colFixedLenUDT = palloc0(sizeof(struct varlena *) * natts);

for (int i = 0; i < orcFormatData->numberOfColumns; ++i) {
// allocate memory for colFixedLenUDT[i] of fixed-length type in advance
bool isFixedLengthType = tup_desc->attrs[i]->attlen > 0 ? true : false;
if (isFixedLengthType) {
orcFormatData->colFixedLenUDT[i] = (struct valena *)palloc0(
tup_desc->attrs[i]->attlen + sizeof(uint32_t));
}

orcFormatData->colNames[i] = palloc0(NAMEDATALEN);
strcpy(orcFormatData->colNames[i], tup_desc->attrs[i]->attname.data);

@@ -105,8 +114,12 @@ static void initOrcFormatUserData(TupleDesc tup_desc,
}

static freeOrcFormatUserData(OrcFormatData *orcFormatData) {
for (int i = 0; i < orcFormatData->numberOfColumns; ++i)
for (int i = 0; i < orcFormatData->numberOfColumns; ++i) {
pfree(orcFormatData->colNames[i]);
if (orcFormatData->colFixedLenUDT[i])
pfree(orcFormatData->colFixedLenUDT[i]);
}

pfree(orcFormatData->colTimestamp);
pfree(orcFormatData->colValLength);
pfree(orcFormatData->colRawValues);
@@ -235,17 +248,37 @@ static void convertAndFillIntoOrcFormatData(OrcFormatData *orcFormatData,
int *date = (int *)(&(values[i]));
*date += POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE;
orcFormatData->colRawValues[i] = (char *)(&(values[i]));
} else if (dataType == HAWQ_TYPE_TEXT || dataType == HAWQ_TYPE_BPCHAR ||
dataType == HAWQ_TYPE_VARCHAR) {
struct varlena *data = PG_DETOAST_DATUM(values[i]);
orcFormatData->colRawValues[i] = (char *)data;
} else if (dataType == HAWQ_TYPE_BYTE) {
orcFormatData->colRawValues[i] = (char *)PG_DETOAST_DATUM(values[i]);
} else if (dataType == HAWQ_TYPE_NUMERIC) {
Numeric num = DatumGetNumeric(values[i]);
orcFormatData->colRawValues[i] = (char *)num;
if (NUMERIC_IS_NAN(num))
nulls[i] = true;
if (NUMERIC_IS_NAN(num)) nulls[i] = true;
} else {
// Check whether values[i] is fixed length udt.
bool isFixedLengthType = tupleDesc->attrs[i]->attlen > 0 ? true : false;
bool isPassByVal = tupleDesc->attrs[i]->attbyval;
if (isFixedLengthType) {
uint32_t dataLen = tupleDesc->attrs[i]->attlen;
uint32_t totalLen = dataLen + sizeof(uint32_t);

uint32_t tmpLen = __builtin_bswap32(totalLen);
char *lenArr = (char *)(&tmpLen);
memcpy(orcFormatData->colFixedLenUDT[i]->vl_len_, lenArr,
sizeof(uint32_t));

if (isPassByVal) { // pass by val
char *data = (char *)(&values[i]);
memcpy(orcFormatData->colFixedLenUDT[i]->vl_dat, data, dataLen);
orcFormatData->colRawValues[i] =
(char *)(orcFormatData->colFixedLenUDT[i]);
} else { // pass by pointer
char *data = (char *)(values[i]);
memcpy(orcFormatData->colFixedLenUDT[i]->vl_dat, data, dataLen);
orcFormatData->colRawValues[i] =
(char *)(orcFormatData->colFixedLenUDT[i]);
}
} else {
orcFormatData->colRawValues[i] = (char *)PG_DETOAST_DATUM(values[i]);
}
}
}
}
@@ -402,51 +435,68 @@ void orcReadNext(OrcScanDescData *scanData, TupleTableSlot *slot) {
continue;

switch (tupleDesc->attrs[i]->atttypid) {
case HAWQ_TYPE_BOOL: {
values[i] = BoolGetDatum(*(bool *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT2: {
values[i] = Int16GetDatum(*(int16_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT4: {
values[i] = Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT8:
case HAWQ_TYPE_TIME:
case HAWQ_TYPE_TIMESTAMP:
case HAWQ_TYPE_TIMESTAMPTZ: {
values[i] = Int64GetDatum(*(int64_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_FLOAT4: {
values[i] = Float4GetDatum(*(float *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_FLOAT8: {
values[i] = Float8GetDatum(*(double *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_VARCHAR:
case HAWQ_TYPE_TEXT:
case HAWQ_TYPE_BPCHAR:
case HAWQ_TYPE_BYTE:
case HAWQ_TYPE_NUMERIC: {
SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
orcFormatData->colValLength[i]);
values[i] = PointerGetDatum(orcFormatData->colRawValues[i]);
break;
}
case HAWQ_TYPE_DATE: {
values[i] = Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]) -
POSTGRES_EPOCH_JDATE + UNIX_EPOCH_JDATE);
break;
}
default: {
break;
}
case HAWQ_TYPE_BOOL: {
values[i] = BoolGetDatum(*(bool *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT2: {
values[i] =
Int16GetDatum(*(int16_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT4: {
values[i] =
Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_INT8:
case HAWQ_TYPE_TIME:
case HAWQ_TYPE_TIMESTAMP:
case HAWQ_TYPE_TIMESTAMPTZ: {
values[i] =
Int64GetDatum(*(int64_t *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_FLOAT4: {
values[i] =
Float4GetDatum(*(float *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_FLOAT8: {
values[i] =
Float8GetDatum(*(double *)(orcFormatData->colRawValues[i]));
break;
}
case HAWQ_TYPE_DATE: {
values[i] =
Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]) -
POSTGRES_EPOCH_JDATE + UNIX_EPOCH_JDATE);
break;
}
default: {
// Check whether value[i] is fixed length udt.
bool isFixedLengthType =
tupleDesc->attrs[i]->attlen > 0 ? true : false;
bool isPassByVal = tupleDesc->attrs[i]->attbyval;
if (isFixedLengthType) {
if (isPassByVal) { // pass by val
struct varlena *var =
(struct varlena *)(orcFormatData->colRawValues[i]);
uint32 valLen = *(uint32 *)(var->vl_len_);
memcpy((void *)&values[i], var->vl_dat, valLen);
} else { // pass by pointer
SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
orcFormatData->colValLength[i]);
values[i] = PointerGetDatum(orcFormatData->colRawValues[i] +
sizeof(uint32_t));
}
} else {
SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
orcFormatData->colValLength[i]);
values[i] = PointerGetDatum(orcFormatData->colRawValues[i]);
}
break;
}
}
}
TupSetVirtualTupleNValid(slot, slot->tts_tupleDescriptor->natts);
@@ -1,28 +1,29 @@
/*-------------------------------------------------------------------------
*
* hawq_type_mapping.c
* Definitions for hawq type mapping function
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*-------------------------------------------------------------------------
*/
*
* hawq_type_mapping.c
* Definitions for hawq type mapping function
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*-------------------------------------------------------------------------
*/

#include "catalog/pg_magic_oid.h"
#include "utils/hawq_type_mapping.h"
#include "miscadmin.h"

@@ -129,10 +130,13 @@ int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID) {
case HAWQ_TYPE_POLYGON:
case HAWQ_TYPE_CIRCLE:
default:
return type_is_rowtype(hawqTypeID)
? (STRUCTEXID)
: (type_is_basetype(hawqTypeID) ? IOBASETYPEID
: INVALIDTYPEID);
if (HAEQ_TYPE_UDT(hawqTypeID))
return BINARYID;
else
return type_is_rowtype(hawqTypeID)
? (STRUCTEXID)
: (type_is_basetype(hawqTypeID) ? IOBASETYPEID
: INVALIDTYPEID);
}
}

@@ -189,7 +193,6 @@ bool checkORCUnsupportedDataType(int32_t hawqTypeID) {
case HAWQ_TYPE_INT2:
case HAWQ_TYPE_INT4:
case HAWQ_TYPE_INT8:
case HAWQ_TYPE_TID:
case HAWQ_TYPE_FLOAT4:
case HAWQ_TYPE_FLOAT8:
case HAWQ_TYPE_TEXT:
@@ -211,6 +214,6 @@ bool checkORCUnsupportedDataType(int32_t hawqTypeID) {
case HAWQ_TYPE_UNKNOWN:
return false;
default:
return true;
return !HAEQ_TYPE_UDT(hawqTypeID);
}
}
@@ -81,6 +81,8 @@

#define HAWQ_TYPE_UNKNOWN 705

#define HAEQ_TYPE_UDT(x) ( x > FirstNormalObjectId)

extern int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID);

// if hawq type unsupported, return true

0 comments on commit c82cf9f

Please sign in to comment.