Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/neug-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ jobs:
sudo chmod -R 777 /etc/security/*
echo "* soft nofile 1048576" | tee -a /etc/security/limits.conf

- name: Clean the tool directory for self-hosted runner
run: |
rm -rf /__w/_tool/Python

- name: Setup Python
uses: actions/setup-python@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ option(ENABLE_THREAD_SANITIZER "Enable thread sanitizer." FALSE)
option(ENABLE_UBSAN "Enable undefined behavior sanitizer." FALSE)
option(ENABLE_RUNTIME_CHECKS "Enable runtime coherency checks (e.g. asserts)" FALSE)
option(ENABLE_LTO "Enable Link-Time Optimization" FALSE)
option(AUTO_UPDATE_GRAMMAR "Automatically regenerate C++ grammar files on change." TRUE)
option(AUTO_UPDATE_GRAMMAR "Automatically regenerate C++ grammar files on change." FALSE)
Comment thread
shirly121 marked this conversation as resolved.
option(BUILD_EXTENSIONS "Semicolon-separated list of extensions to build." "")
option(BUILD_SINGLE_FILE_HEADER "Build single file header. Requires Python >= 3.9." FALSE)
option(BUILD_TEST "Build C++ tests." TRUE)
Expand Down
5 changes: 4 additions & 1 deletion doc/source/cypher_manual/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,13 @@ The following table showcases all data types supported by NeuG and their differe

### String Types

We currently support only the VARCHAR type for strings. You can specify a maximum character length using the `VARCHAR(max_length)` syntax. The default value of `max_length` is 256, and the maximum limit is 65536.
Alternatively, you can use STRING to specify the character type directly; STRING is equivalent to VARCHAR(256), i.e., a varchar type with a default maximum length of 256 characters.

#### VARCHAR
- **Description**: Variable-length character string with UTF-8 encoding
- **Query Example**: `RETURN 'Hello World' AS string_value;`
- **Length**: Variable, limited by system constraints, default is `65536`
- **Length**: Variable, limited by system constraints, default is `256`

### Temporal Types

Expand Down
28 changes: 27 additions & 1 deletion include/neug/compiler/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ enum class PhysicalTypeID : uint8_t {
class ExtraTypeInfo;
class StructField;
class StructTypeInfo;
class StringTypeInfo;

enum class TypeCategory : uint8_t { INTERNAL = 0, UDT = 1 };

Expand Down Expand Up @@ -340,6 +341,13 @@ class LogicalType {
return ret;
}

// default max_length value of VARCHAR type if max_length is not defined
// explicitly
static size_t getDefaultStringMaxLen() { return 256; }

// maximum limit of max_length value of VARCHAR type
static size_t getMaxStringMaxLen() { return 65536; }

static LogicalType BOOL() { return LogicalType(LogicalTypeID::BOOL); }
static LogicalType HASH() { return LogicalType(LogicalTypeID::UINT64); }
static LogicalType INT64() { return LogicalType(LogicalTypeID::INT64); }
Expand Down Expand Up @@ -375,7 +383,8 @@ class LogicalType {
return LogicalType(LogicalTypeID::INTERNAL_ID);
}
static LogicalType SERIAL() { return LogicalType(LogicalTypeID::SERIAL); }
static LogicalType STRING() { return LogicalType(LogicalTypeID::STRING); }
static LogicalType STRING();
static LogicalType STRING(size_t max_length);
static LogicalType BLOB() { return LogicalType(LogicalTypeID::BLOB); }
static LogicalType UUID() { return LogicalType(LogicalTypeID::UUID); }
static LogicalType POINTER() { return LogicalType(LogicalTypeID::POINTER); }
Expand Down Expand Up @@ -448,6 +457,23 @@ class NEUG_API ExtraTypeInfo {
virtual void serializeInternal(Serializer& serializer) const = 0;
};

class NEUG_API StringTypeInfo : public ExtraTypeInfo {
public:
explicit StringTypeInfo(size_t max_length) : max_length(max_length) {}

size_t getMaxLength() const { return max_length; }

bool containsAny() const override { return false; }

bool operator==(const ExtraTypeInfo& other) const override;

std::unique_ptr<ExtraTypeInfo> copy() const override;

private:
virtual void serializeInternal(Serializer& serializer) const override;
size_t max_length;
};

class NEUG_API UDTTypeInfo : public ExtraTypeInfo {
public:
explicit UDTTypeInfo(std::string typeName) : typeName{std::move(typeName)} {}
Expand Down
1 change: 0 additions & 1 deletion include/neug/compiler/gopt/g_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ namespace neug {
class Constants {
public:
static inline uint64_t MAX_UPPER_BOUND = INT32_MAX;
static inline uint64_t VARCHAR_MAX_LENGTH = 65536;
static inline uint64_t ARRAY_MAX_LENGTH = 256;
static inline neug::transaction::Transaction DEFAULT_TRANSACTION =
neug::transaction::Transaction(
Expand Down
28 changes: 24 additions & 4 deletions include/neug/compiler/gopt/g_type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,16 @@ class GTypeUtils {
auto stringType = node["string"];
if (stringType) {
// denote varchar
if (stringType["var_char"] || stringType["long_text"]) {
return neug::common::LogicalType(neug::common::LogicalTypeID::STRING);
if (stringType["var_char"]) {
auto varChar = stringType["var_char"];
auto maxLength = varChar["max_length"];
if (maxLength && maxLength.IsScalar()) {
return neug::common::LogicalType::STRING(maxLength.as<uint64_t>());
} else {
return neug::common::LogicalType::STRING();
}
} else if (stringType["long_text"]) {
return neug::common::LogicalType::STRING();
}
}
auto temporalType = node["temporal"];
Expand Down Expand Up @@ -115,8 +123,20 @@ class GTypeUtils {
return YAML_NODE_DT_DOUBLE;
case neug::common::LogicalTypeID::BOOL:
return YAML_NODE_DT_BOOL;
case neug::common::LogicalTypeID::STRING:
return YAML_NODE_STRING_VARCHAR(neug::Constants::VARCHAR_MAX_LENGTH);
case neug::common::LogicalTypeID::STRING: {
size_t maxLen;
auto extraInfo = type.getExtraTypeInfo();
if (extraInfo) {
auto stringTypeInfo =
extraInfo->constPtrCast<neug::common::StringTypeInfo>();
maxLen = stringTypeInfo->getMaxLength();
} else {
maxLen = neug::common::LogicalType::getDefaultStringMaxLen();
}
YAML::Node n;
n["string"]["var_char"]["max_length"] = maxLen;
return n;
}
case neug::common::LogicalTypeID::DATE32:
return YAML_NODE_TEMPORAL_DATE32();
case neug::common::LogicalTypeID::TIMESTAMP64:
Expand Down
4 changes: 2 additions & 2 deletions include/neug/compiler/tools/shell/include/keywords.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#ifndef _keywordList
// clang-format off
#define _keywordList {"ACYCLIC", "ANY", "ADD", "ALL", "ALTER", "AND", "AS", "ASC", "ASCENDING", "ATTACH", "BEGIN", "BY", "CALL", "CASE", "CAST", "CHECKPOINT", "COLUMN", "COMMENT", "COMMIT", "COMMIT_SKIP_CHECKPOINT", "CONTAINS", "COPY", "COUNT", "CREATE", "CYCLE", "DATABASE", "DBTYPE", "DEFAULT", "DELETE", "DESC", "DESCENDING", "DETACH", "DISTINCT", "DROP", "ELSE", "END", "ENDS", "EXISTS", "EXPLAIN", "EXPORT", "EXTENSION", "FALSE", "FROM", "GLOB", "GRAPH", "GROUP", "HEADERS", "HINT", "IMPORT", "IF", "IN", "INCREMENT", "INSTALL", "IS", "JOIN", "KEY", "LIMIT", "LOAD", "LOGICAL", "MACRO", "MATCH", "MAXVALUE", "MERGE", "MINVALUE", "MULTI_JOIN", "NO", "NODE", "NOT", "NONE", "NULL", "ON", "ONLY", "OPTIONAL", "OR", "ORDER", "PRIMARY", "PROFILE", "PROJECT", "READ", "REL", "RENAME", "RETURN", "ROLLBACK", "ROLLBACK_SKIP_CHECKPOINT", "SEQUENCE", "SET", "SHORTEST", "START", "STARTS", "TABLE", "THEN", "TO", "TRAIL", "TRANSACTION", "TRUE", "TYPE", "UNION", "UNWIND", "USE", "WHEN", "WHERE", "WITH", "WRITE", "WSHORTEST", "XOR", "SINGLE", "YIELD"}
#define _keywordListLength 107
#define _keywordList {"ACYCLIC", "ANY", "ADD", "ALL", "ALTER", "AND", "AS", "ASC", "ASCENDING", "ATTACH", "BEGIN", "BY", "CALL", "CASE", "CAST", "CHECKPOINT", "COLUMN", "COMMENT", "COMMIT", "COMMIT_SKIP_CHECKPOINT", "CONTAINS", "COPY", "COUNT", "CREATE", "CYCLE", "DATABASE", "DBTYPE", "DEFAULT", "DELETE", "DESC", "DESCENDING", "DETACH", "DISTINCT", "DROP", "ELSE", "END", "ENDS", "EXISTS", "EXPLAIN", "EXPORT", "EXTENSION", "FROM", "GLOB", "GRAPH", "GROUP", "HEADERS", "HINT", "IMPORT", "IF", "IN", "INCREMENT", "INSTALL", "IS", "JOIN", "KEY", "LIMIT", "LOAD", "LOGICAL", "MACRO", "MATCH", "MAXVALUE", "MERGE", "MINVALUE", "MULTI_JOIN", "NO", "NODE", "NOT", "NONE", "NULL", "ON", "ONLY", "OPTIONAL", "OR", "ORDER", "PRIMARY", "PROFILE", "PROJECT", "READ", "REL", "RENAME", "RETURN", "ROLLBACK", "ROLLBACK_SKIP_CHECKPOINT", "SEQUENCE", "SET", "SHORTEST", "START", "STARTS", "TABLE", "THEN", "TO", "TRAIL", "TRANSACTION", "TYPE", "UNINSTALL", "UNION", "UNWIND", "USE", "WHEN", "WHERE", "WITH", "WRITE", "WSHORTEST", "XOR", "SINGLE", "YIELD"}
#define _keywordListLength 106
Comment thread
shirly121 marked this conversation as resolved.
// clang-format on
#endif
5 changes: 4 additions & 1 deletion include/neug/execution/common/types/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class Value {

static Value STRING(const std::string& str);

static Value VARCHAR(const std::string& str, uint16_t max_length);

static Value VERTEX(const vertex_t& vertex);

static Value EDGE(const edge_t& edge);
Expand Down Expand Up @@ -630,7 +632,8 @@ bool Value::ApplyComparisonOp(const Value& lhs, const Value& rhs) {
}

Property value_to_property(const Value& value);
Value property_to_value(const Property& property);
Value property_to_value(const Property& property,
const DataType& type = DataType::UNKNOWN);

template <typename T>
Value performCast(const Value& input) {
Expand Down
83 changes: 83 additions & 0 deletions include/neug/storages/column/I_container.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/** Copyright 2020 Alibaba Group Holding Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cstddef>
#include <memory>
#include <string>

/**
* @brief Interface for data containers with mmap-based storage.
*
* IDataContainer defines the contract for all data container implementations,
* supporting various storage strategies (anonymous mmap, file-backed mmap,
* etc.)
*/
class IDataContainer {
public:
virtual ~IDataContainer() {}

/**
* @brief Get pointer to the data region.
*/
virtual void* GetData() = 0;

/**
* @brief Get the size of the data region.
*/
virtual size_t GetDataSize() = 0;

/**
* @brief Get the file path (empty for anonymous mappings).
*/
virtual std::string GetPath() = 0;

/**
* @brief Get the storage strategy used by this container.
*/
virtual StorageStrategy GetStorageStrategy() const = 0;

/**
* @brief Open a file-backed container.
*/
virtual void Open(const std::string& path) = 0;

/**
* @brief Synchronize changes to persistent storage.
*/
virtual void Sync() = 0;

/**
* @brief Dump the container contents to a file.
* @note This will close the container after writing.
*/
virtual void Dump(const std::string& path) = 0;

/**
* @brief Close the container and release resources.
*/
virtual void Close() = 0;

/**
* @brief Check if the data has been modified.
*/
virtual bool IsDirty() = 0;

/**
* @brief Create a fork (copy) of this container.
*/
virtual std::unique_ptr<IDataContainer> Fork(Checkpoint& checkpoint,
StorageStrategy strategy) = 0;
};
5 changes: 4 additions & 1 deletion src/compiler/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,16 @@ nEUG_CreateNodeConstraint : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')

DECIMAL: ( 'D' | 'd' ) ( 'E' | 'e' ) ( 'C' | 'c' ) ( 'I' | 'i' ) ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

VARCHAR: ('V' | 'v') ('A' | 'a') ('R' | 'r') ('C' | 'c') ('H' | 'h') ('A' | 'a') ('R' | 'r');

nEUG_DataType
: oC_SymbolicName
| nEUG_DataType nEUG_ListIdentifiers
| UNION SP? '(' SP? nEUG_ColumnDefinitions SP? ')'
| oC_SymbolicName SP? '(' SP? nEUG_ColumnDefinitions SP? ')'
| oC_SymbolicName SP? '(' SP? nEUG_DataType SP? ',' SP? nEUG_DataType SP? ')'
| DECIMAL SP? '(' SP? oC_IntegerLiteral SP? ',' SP? oC_IntegerLiteral SP? ')' ;
| DECIMAL SP? '(' SP? oC_IntegerLiteral SP? ',' SP? oC_IntegerLiteral SP? ')'
| VARCHAR SP? '(' SP? oC_IntegerLiteral SP? ')';

nEUG_ListIdentifiers : nEUG_ListIdentifier ( nEUG_ListIdentifier )* ;

Expand Down
61 changes: 61 additions & 0 deletions src/compiler/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,20 @@ uint32_t PhysicalTypeUtils::getFixedTypeSize(PhysicalTypeID physicalType) {
}
}

void StringTypeInfo::serializeInternal(Serializer& serializer) const {};
Comment thread
shirly121 marked this conversation as resolved.

bool StringTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherStringTypeInfo = dynamic_cast<const StringTypeInfo*>(&other);
if (otherStringTypeInfo) {
return max_length == otherStringTypeInfo->max_length;
}
return false;
}
Comment thread
shirly121 marked this conversation as resolved.

std::unique_ptr<ExtraTypeInfo> StringTypeInfo::copy() const {
return std::make_unique<StringTypeInfo>(max_length);
}

bool DecimalTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherDecimalTypeInfo = neug_dynamic_cast<const DecimalTypeInfo*>(&other);
if (otherDecimalTypeInfo) {
Expand Down Expand Up @@ -603,6 +617,11 @@ bool LogicalType::operator==(const LogicalType& other) const {
if (typeID != other.typeID || category != other.category) {
return false;
}
// We think the string type with different max_length are equal, so skip the
// comparison of extraTypeInfo.
if (typeID == LogicalTypeID::STRING) {
return true;
}
if (extraTypeInfo) {
return *extraTypeInfo == *other.extraTypeInfo;
}
Expand Down Expand Up @@ -744,6 +763,8 @@ static LogicalType parseUnionType(const std::string& trimmedStr,
main::ClientContext* context = nullptr);
static LogicalType parseDecimalType(const std::string& trimmedStr);

static LogicalType parseStringType(const std::string& trimmedStr);

bool LogicalType::isBuiltInType(const std::string& str) {
auto trimmedStr = StringUtils::ltrim(StringUtils::rtrim(str));
auto upperDataTypeString = StringUtils::getUpper(trimmedStr);
Expand Down Expand Up @@ -787,6 +808,10 @@ LogicalType LogicalType::convertFromString(const std::string& str,
} else if (upperDataTypeString.starts_with("DECIMAL") ||
upperDataTypeString.starts_with("NUMERIC")) {
type = parseDecimalType(trimmedStr);
} else if (upperDataTypeString == "STRING") {
type = LogicalType::STRING();
} else if (upperDataTypeString.starts_with("VARCHAR")) {
type = parseStringType(trimmedStr);
} else if (tryGetIDFromString(upperDataTypeString, type.typeID)) {
type.physicalType =
LogicalType::getPhysicalType(type.typeID, type.extraTypeInfo);
Expand Down Expand Up @@ -1472,6 +1497,28 @@ LogicalType parseUnionType(const std::string& trimmedStr,
return LogicalType::UNION(parseStructTypeInfo(trimmedStr, context));
}

LogicalType parseStringType(const std::string& trimmedStr) {
auto leftBracketPos = trimmedStr.find('(');
auto rightBracketPos = trimmedStr.find_last_of(')');
if (leftBracketPos == std::string::npos ||
rightBracketPos == std::string::npos) {
THROW_BINDER_EXCEPTION(
"Invalid format for VARCHAR type, should be VARCHAR(max_length). "
"Given: " +
trimmedStr);
}
auto maxLenStr = StringUtils::ltrim(StringUtils::rtrim(trimmedStr.substr(
leftBracketPos + 1, rightBracketPos - leftBracketPos - 1)));
char* endPtr = nullptr;
auto maxLen = std::strtoll(maxLenStr.c_str(), &endPtr, 10);
if (endPtr == maxLenStr.c_str() || *endPtr != '\0') {
THROW_BINDER_EXCEPTION(
Comment thread
shirly121 marked this conversation as resolved.
"The max length of string must be a positive integer. Given: " +
maxLenStr);
}
return LogicalType::STRING(maxLen);
Comment thread
zhanglei1949 marked this conversation as resolved.
}

LogicalType parseDecimalType(const std::string& trimmedStr) {
auto leftBracketPos = trimmedStr.find_last_of('(');
auto rightBracketPos = trimmedStr.find_last_of(')');
Expand Down Expand Up @@ -1514,6 +1561,20 @@ LogicalType LogicalType::STRUCT(std::vector<StructField>&& fields) {
std::make_unique<StructTypeInfo>(std::move(fields)));
}

LogicalType LogicalType::STRING() { return STRING(getDefaultStringMaxLen()); }

LogicalType LogicalType::STRING(size_t max_length) {
size_t maxLimit = getMaxStringMaxLen();
if (max_length > maxLimit) {
LOG(WARNING) << "The max length of string is greater than the maximum "
"limit, the maximum limit is "
<< maxLimit;
max_length = maxLimit;
}
return LogicalType(LogicalTypeID::STRING,
std::make_unique<StringTypeInfo>(max_length));
}

LogicalType LogicalType::RECURSIVE_REL(
std::unique_ptr<StructTypeInfo> typeInfo) {
return LogicalType(LogicalTypeID::RECURSIVE_REL, std::move(typeInfo));
Expand Down
14 changes: 13 additions & 1 deletion src/compiler/gopt/g_type_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,21 @@ GPhysicalTypeConverter::convertSimpleLogicalType(
break;
}
case common::LogicalTypeID::STRING: {
auto extraInfo = type.getExtraTypeInfo();
size_t maxLen;
if (!extraInfo) {
LOG(WARNING)
<< "Missing extra type info in string type, use default max length: "
<< common::LogicalType::getDefaultStringMaxLen();
maxLen = common::LogicalType::getDefaultStringMaxLen();
} else {
auto stringTypeInfo =
extraInfo->constPtrCast<neug::common::StringTypeInfo>();
maxLen = stringTypeInfo->getMaxLength();
}
auto strType = std::make_unique<::common::String>();
auto varChar = std::make_unique<::common::String::VarChar>();
varChar->set_max_length(neug::Constants::VARCHAR_MAX_LENGTH);
varChar->set_max_length(maxLen);
strType->set_allocated_var_char(varChar.release());
result->set_allocated_string(strType.release());
break;
Expand Down
Loading
Loading