Skip to content

Commit

Permalink
5
Browse files Browse the repository at this point in the history
  • Loading branch information
morningman committed Aug 22, 2023
1 parent 1563068 commit 8370e1a
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 5 deletions.
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,8 @@ DEFINE_mBool(enable_merge_on_write_correctness_check, "true");
// The secure path with user files, used in the `local` table function.
DEFINE_mString(user_files_secure_path, "${DORIS_HOME}");

DEFINE_mString(default_lz4_codec, "block");

#ifdef BE_TEST
// test s3
DEFINE_String(test_s3_resource, "resource");
Expand Down
8 changes: 8 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,14 @@ DECLARE_mBool(enable_merge_on_write_correctness_check);
// The secure path with user files, used in the `local` table function.
DECLARE_mString(user_files_secure_path);

// The default lz4 codec. Options: frame, block
// In previous, we use lz4 "frame" as the default codec
// but the hadoop use lz4 block to write data
// So in v2.0, change the default codec to "block"
// So that we can read lz4 data from hive table by default.
// TODO: find a way to auto detect this.
DECLARE_mString(default_lz4_codec);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
1 change: 1 addition & 0 deletions be/src/service/internal_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ void PInternalServiceImpl::fetch_table_schema(google::protobuf::RpcController* c
case TFileFormatType::FORMAT_CSV_GZ:
case TFileFormatType::FORMAT_CSV_BZ2:
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
case TFileFormatType::FORMAT_CSV_LZOP:
case TFileFormatType::FORMAT_CSV_DEFLATE: {
// file_slots is no use
Expand Down
12 changes: 9 additions & 3 deletions be/src/util/load_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@ void LoadUtil::parse_format(const std::string& format_str, const std::string& co
*format_type = TFileFormatType::FORMAT_CSV_BZ2;
*compress_type = TFileCompressType::BZ2;
} else if (iequal(compress_type_str, "LZ4")) {
*format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
*compress_type = TFileCompressType::LZ4FRAME;
if (config::default_lz4_codec == "block") {
*format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
*compress_type = TFileCompressType::LZ4BLOCK;
} else {
*format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
*compress_type = TFileCompressType::LZ4FRAME;
}
} else if (iequal(compress_type_str, "LZOP")) {
*format_type = TFileFormatType::FORMAT_CSV_LZOP;
*compress_type = TFileCompressType::LZO;
Expand All @@ -72,6 +77,7 @@ bool LoadUtil::is_format_support_streaming(TFileFormatType::type format) {
case TFileFormatType::FORMAT_CSV_DEFLATE:
case TFileFormatType::FORMAT_CSV_GZ:
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
case TFileFormatType::FORMAT_CSV_LZO:
case TFileFormatType::FORMAT_CSV_LZOP:
case TFileFormatType::FORMAT_JSON:
Expand All @@ -81,4 +87,4 @@ bool LoadUtil::is_format_support_streaming(TFileFormatType::type format) {
}
return false;
}
} // namespace doris
} // namespace doris
6 changes: 4 additions & 2 deletions be/src/vec/exec/format/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ Status CsvReader::init_reader(bool is_load) {
[[fallthrough]];
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
[[fallthrough]];
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
[[fallthrough]];
case TFileFormatType::FORMAT_CSV_LZOP:
[[fallthrough]];
case TFileFormatType::FORMAT_CSV_DEFLATE:
Expand Down Expand Up @@ -481,7 +483,7 @@ Status CsvReader::_create_decompressor() {
compress_type = CompressType::BZIP2;
break;
case TFileCompressType::LZ4FRAME:
compress_type = CompressType::LZ4FRAME;
compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK : CompressType::LZ4FRAME;
break;
case TFileCompressType::LZ4BLOCK:
compress_type = CompressType::LZ4BLOCK;
Expand All @@ -506,7 +508,7 @@ Status CsvReader::_create_decompressor() {
compress_type = CompressType::BZIP2;
break;
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
compress_type = CompressType::LZ4FRAME;
compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK : CompressType::LZ4FRAME;
break;
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
compress_type = CompressType::LZ4BLOCK;
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/scan/vfile_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ Status VFileScanner::_get_next_reader() {
case TFileFormatType::FORMAT_CSV_GZ:
case TFileFormatType::FORMAT_CSV_BZ2:
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
case TFileFormatType::FORMAT_CSV_LZOP:
case TFileFormatType::FORMAT_CSV_DEFLATE:
case TFileFormatType::FORMAT_PROTO: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,7 @@ public static boolean isCsvFormat(TFileFormatType fileFormatType) {
|| fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE
|| fileFormatType == TFileFormatType.FORMAT_CSV_GZ
|| fileFormatType == TFileFormatType.FORMAT_CSV_LZ4FRAME
|| fileFormatType == TFileFormatType.FORMAT_CSV_LZ4BLOCK
|| fileFormatType == TFileFormatType.FORMAT_CSV_LZO
|| fileFormatType == TFileFormatType.FORMAT_CSV_LZOP
|| fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN;
Expand Down

0 comments on commit 8370e1a

Please sign in to comment.