diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 54a65f7802af59..eeb563786121ef 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1643,6 +1643,8 @@ DEFINE_mBool(enable_parquet_page_index, "true"); DEFINE_mBool(ignore_not_found_file_in_external_table, "true"); +DEFINE_mBool(ignore_not_found_segment, "true"); + DEFINE_mBool(enable_hdfs_mem_limiter, "true"); DEFINE_mInt16(topn_agg_limit_multiplier, "2"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 68a5f6dce1f8b3..5fa90ff08e6590 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1704,6 +1704,11 @@ DECLARE_mBool(enable_parquet_page_index); // Default is true, if set to false, the not found file will result in query failure. DECLARE_mBool(ignore_not_found_file_in_external_table); +// Whether to ignore IO errors (NOT_FOUND, EIO) when loading segment files in native olap tables. +// Default is true. When a segment file is missing or has IO errors, +// the query/load will skip the failing segment instead of reporting error to users. +DECLARE_mBool(ignore_not_found_segment); + DECLARE_mBool(enable_hdfs_mem_limiter); // Define how many percent data in hashtable bigger than limit diff --git a/be/src/storage/rowset/beta_rowset.cpp b/be/src/storage/rowset/beta_rowset.cpp index e94430731b17da..65d227a524bac3 100644 --- a/be/src/storage/rowset/beta_rowset.cpp +++ b/be/src/storage/rowset/beta_rowset.cpp @@ -251,7 +251,15 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, int64_t seg_id = seg_id_begin; while (seg_id < seg_id_end) { std::shared_ptr segment; - RETURN_IF_ERROR(load_segment(seg_id, nullptr, &segment)); + auto st = load_segment(seg_id, nullptr, &segment); + if ((st.is() || st.is()) && + config::ignore_not_found_segment) { + LOG(WARNING) << "segment io error, skip it. rowset_id=" << rowset_id() + << ", seg_id=" << seg_id << ", status=" << st; + seg_id++; + continue; + } + RETURN_IF_ERROR(st); segments->push_back(std::move(segment)); seg_id++; } @@ -260,6 +268,12 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, Status BetaRowset::load_segment(int64_t seg_id, OlapReaderStatistics* stats, segment_v2::SegmentSharedPtr* segment) { + DBUG_EXECUTE_IF("BetaRowset::load_segment.return_not_found", { + return Status::Error("injected segment not found, seg_id={}", seg_id); + }); + DBUG_EXECUTE_IF("BetaRowset::load_segment.return_io_error", { + return Status::Error("injected segment io error, seg_id={}", seg_id); + }); auto fs = _rowset_meta->fs(); if (!fs) { return Status::Error("get fs failed"); diff --git a/be/src/storage/segment/lazy_init_segment_iterator.cpp b/be/src/storage/segment/lazy_init_segment_iterator.cpp index 723233d4cbc210..77fc0b3f495656 100644 --- a/be/src/storage/segment/lazy_init_segment_iterator.cpp +++ b/be/src/storage/segment/lazy_init_segment_iterator.cpp @@ -17,6 +17,7 @@ #include "storage/segment/lazy_init_segment_iterator.h" +#include "storage/rowset/beta_rowset.h" #include "storage/segment/segment_loader.h" namespace doris::segment_v2 { @@ -40,8 +41,16 @@ Status LazyInitSegmentIterator::init(const StorageReadOptions& opts) { std::shared_ptr segment; { SegmentCacheHandle segment_cache_handle; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segment( - _rowset, _segment_id, &segment_cache_handle, _should_use_cache, false, opts.stats)); + auto st = SegmentLoader::instance()->load_segment( + _rowset, _segment_id, &segment_cache_handle, _should_use_cache, false, opts.stats); + if ((st.is() || st.is()) && + config::ignore_not_found_segment) { + LOG(WARNING) << "segment io error, skip it. rowset_id=" << _rowset->rowset_id() + << ", seg_id=" << _segment_id << ", status=" << st; + // _inner_iterator remains nullptr, next_batch() will return EOF + return Status::OK(); + } + RETURN_IF_ERROR(st); const auto& tmp_segments = segment_cache_handle.get_segments(); segment = tmp_segments[0]; } diff --git a/be/src/storage/segment/lazy_init_segment_iterator.h b/be/src/storage/segment/lazy_init_segment_iterator.h index 147480e0e85886..434596a93fe961 100644 --- a/be/src/storage/segment/lazy_init_segment_iterator.h +++ b/be/src/storage/segment/lazy_init_segment_iterator.h @@ -40,7 +40,9 @@ class LazyInitSegmentIterator : public RowwiseIterator { Status next_batch(Block* block) override { if (UNLIKELY(_need_lazy_init)) { RETURN_IF_ERROR(init(_read_options)); - DCHECK(_inner_iterator != nullptr); + } + if (_inner_iterator == nullptr) { + return Status::EndOfFile("segment not found, skipped"); } return _inner_iterator->next_batch(block); @@ -49,6 +51,9 @@ class LazyInitSegmentIterator : public RowwiseIterator { const Schema& schema() const override { return *_schema; } Status current_block_row_locations(std::vector* locations) override { + if (_inner_iterator == nullptr) { + return Status::EndOfFile("no segment loaded"); + } return _inner_iterator->current_block_row_locations(locations); } diff --git a/be/src/storage/segment/segment_loader.cpp b/be/src/storage/segment/segment_loader.cpp index 0028980c57b6a5..0ea1a7738afe80 100644 --- a/be/src/storage/segment/segment_loader.cpp +++ b/be/src/storage/segment/segment_loader.cpp @@ -91,8 +91,15 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, return Status::OK(); } for (int64_t i = 0; i < rowset->num_segments(); i++) { - RETURN_IF_ERROR(load_segment(rowset, i, cache_handle, use_cache, need_load_pk_index_and_bf, - index_load_stats)); + auto st = load_segment(rowset, i, cache_handle, use_cache, need_load_pk_index_and_bf, + index_load_stats); + if ((st.is() || st.is()) && + config::ignore_not_found_segment) { + LOG(WARNING) << "segment io error, skip it. rowset_id=" << rowset->rowset_id() + << ", seg_id=" << i << ", status=" << st; + continue; + } + RETURN_IF_ERROR(st); } cache_handle->set_inited(); return Status::OK(); diff --git a/be/test/storage/segment/ignore_not_found_segment_test.cpp b/be/test/storage/segment/ignore_not_found_segment_test.cpp new file mode 100644 index 00000000000000..808cb47a88a6cc --- /dev/null +++ b/be/test/storage/segment/ignore_not_found_segment_test.cpp @@ -0,0 +1,309 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "common/config.h" +#include "common/status.h" +#include "json2pb/json_to_pb.h" +#include "runtime/exec_env.h" +#include "storage/rowset/beta_rowset.h" +#include "storage/segment/lazy_init_segment_iterator.h" +#include "storage/segment/segment_loader.h" +#include "util/debug_points.h" + +namespace doris { + +class IgnoreNotFoundSegmentTest : public testing::Test { +protected: + void SetUp() override { + _saved_ignore = config::ignore_not_found_segment; + _saved_debug_points = config::enable_debug_points; + config::enable_debug_points = true; + + // Set up a SegmentLoader for LazyInitSegmentIterator tests + _saved_segment_loader = ExecEnv::GetInstance()->segment_loader(); + _segment_loader = new SegmentLoader(1024 * 1024, 100); + ExecEnv::GetInstance()->set_segment_loader(_segment_loader); + } + + void TearDown() override { + DebugPoints::instance()->remove("BetaRowset::load_segment.return_not_found"); + DebugPoints::instance()->remove("BetaRowset::load_segment.return_io_error"); + config::ignore_not_found_segment = _saved_ignore; + config::enable_debug_points = _saved_debug_points; + + ExecEnv::GetInstance()->set_segment_loader(_saved_segment_loader); + delete _segment_loader; + _segment_loader = nullptr; + } + + BetaRowsetSharedPtr create_rowset(int num_segments) { + auto schema = std::make_shared(); + TabletColumn col; + col.set_name("c1"); + col.set_unique_id(0); + col.set_type(FieldType::OLAP_FIELD_TYPE_INT); + col.set_length(4); + col.set_is_key(true); + col.set_is_nullable(false); + schema->append_column(col); + schema->_keys_type = DUP_KEYS; + + auto rsm = std::make_shared(); + std::string json = R"({ + "rowset_id": 540081, + "tablet_id": 10001, + "partition_id": 10000, + "tablet_schema_hash": 567997577, + "rowset_type": "BETA_ROWSET", + "rowset_state": "VISIBLE", + "empty": false + })"; + RowsetMetaPB pb; + EXPECT_TRUE(json2pb::JsonToProtoMessage(json, &pb)); + pb.set_start_version(0); + pb.set_end_version(1); + pb.set_num_segments(num_segments); + rsm->init_from_pb(pb); + rsm->set_tablet_schema(schema); + + return std::make_shared(schema, rsm, ""); + } + + bool _saved_ignore = true; + bool _saved_debug_points = false; + SegmentLoader* _saved_segment_loader = nullptr; + SegmentLoader* _segment_loader = nullptr; +}; + +// Test: BetaRowset::load_segments skips NOT_FOUND segments when config enabled +TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsSkipsNotFound) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + std::vector segments; + auto st = rowset->load_segments(&segments); + // All segments are "not found" but should be skipped, resulting in OK with empty segments + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(0, segments.size()); +} + +// Test: BetaRowset::load_segments fails on NOT_FOUND when config disabled +TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsFailsWhenConfigDisabled) { + config::ignore_not_found_segment = false; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + std::vector segments; + auto st = rowset->load_segments(&segments); + ASSERT_TRUE(st.is()) << st; + ASSERT_EQ(0, segments.size()); +} + +// Test: BetaRowset::load_segments with range skips NOT_FOUND +TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsRangeSkipsNotFound) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(5); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + std::vector segments; + auto st = rowset->load_segments(1, 4, &segments); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(0, segments.size()); +} + +// Test: SegmentLoader::load_segments skips NOT_FOUND segments when config enabled +TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSegmentsSkipsNotFound) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + // Create a SegmentLoader with a small cache + SegmentLoader loader(1024 * 1024, 100); + SegmentCacheHandle handle; + auto st = loader.load_segments(rowset, &handle, false); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(0, handle.get_segments().size()); + ASSERT_TRUE(handle.is_inited()); +} + +// Test: SegmentLoader::load_segments fails when config disabled +TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSegmentsFailsWhenConfigDisabled) { + config::ignore_not_found_segment = false; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + SegmentLoader loader(1024 * 1024, 100); + SegmentCacheHandle handle; + auto st = loader.load_segments(rowset, &handle, false); + ASSERT_TRUE(st.is()) << st; +} + +// Test: SegmentLoader::load_segment (single) returns NOT_FOUND directly +// (single-segment load does not skip; it's the caller's responsibility) +TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSingleSegmentReturnsNotFound) { + auto rowset = create_rowset(1); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + SegmentLoader loader(1024 * 1024, 100); + SegmentCacheHandle handle; + auto st = loader.load_segment(rowset, 0, &handle, false); + ASSERT_TRUE(st.is()) << st; +} + +// Test: LazyInitSegmentIterator returns EOF when segment not found +// (explicit init path - simulates VUnionIterator calling init() before next_batch()) +TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorReturnsEofOnNotFound) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(1); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + auto schema = std::make_shared(rowset->tablet_schema()); + StorageReadOptions opts; + opts.tablet_schema = rowset->tablet_schema(); + + auto iter = + std::make_unique(rowset, 0, false, schema, opts); + + // Explicit init should succeed (segment skipped, inner iterator is null) + auto st = iter->init(opts); + ASSERT_TRUE(st.ok()) << st; + + // next_batch should return EOF since inner iterator is null. + // This is the critical path: _need_lazy_init is already false because init() was called, + // so the null check must be outside the UNLIKELY branch. + Block block; + st = iter->next_batch(&block); + ASSERT_TRUE(st.is()) << st; +} + +// Test: LazyInitSegmentIterator fails when config disabled +TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorFailsWhenConfigDisabled) { + config::ignore_not_found_segment = false; + auto rowset = create_rowset(1); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + auto schema = std::make_shared(rowset->tablet_schema()); + StorageReadOptions opts; + opts.tablet_schema = rowset->tablet_schema(); + + auto iter = + std::make_unique(rowset, 0, false, schema, opts); + + // init should fail with NOT_FOUND + auto st = iter->init(opts); + ASSERT_TRUE(st.is()) << st; +} + +// Test: LazyInitSegmentIterator next_batch path with lazy init (not pre-inited) +// (lazy path - simulates the case where next_batch() triggers init internally) +TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorNextBatchLazyPath) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(1); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found"); + + auto schema = std::make_shared(rowset->tablet_schema()); + StorageReadOptions opts; + opts.tablet_schema = rowset->tablet_schema(); + + auto iter = + std::make_unique(rowset, 0, false, schema, opts); + + // Don't call init() explicitly - let next_batch trigger lazy init + Block block; + auto st = iter->next_batch(&block); + ASSERT_TRUE(st.is()) << st; +} + +// ==================== IO_ERROR tests ==================== + +// Test: BetaRowset::load_segments skips IO_ERROR segments when config enabled +TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsSkipsIOError) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_io_error"); + + std::vector segments; + auto st = rowset->load_segments(&segments); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(0, segments.size()); +} + +// Test: BetaRowset::load_segments fails on IO_ERROR when config disabled +TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsFailsIOErrorWhenConfigDisabled) { + config::ignore_not_found_segment = false; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_io_error"); + + std::vector segments; + auto st = rowset->load_segments(&segments); + ASSERT_TRUE(st.is()) << st; +} + +// Test: SegmentLoader::load_segments skips IO_ERROR segments +TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSegmentsSkipsIOError) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(3); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_io_error"); + + SegmentLoader loader(1024 * 1024, 100); + SegmentCacheHandle handle; + auto st = loader.load_segments(rowset, &handle, false); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(0, handle.get_segments().size()); + ASSERT_TRUE(handle.is_inited()); +} + +// Test: LazyInitSegmentIterator returns EOF on IO_ERROR +TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorReturnsEofOnIOError) { + config::ignore_not_found_segment = true; + auto rowset = create_rowset(1); + + DebugPoints::instance()->add("BetaRowset::load_segment.return_io_error"); + + auto schema = std::make_shared(rowset->tablet_schema()); + StorageReadOptions opts; + opts.tablet_schema = rowset->tablet_schema(); + + auto iter = + std::make_unique(rowset, 0, false, schema, opts); + + auto st = iter->init(opts); + ASSERT_TRUE(st.ok()) << st; + + Block block; + st = iter->next_batch(&block); + ASSERT_TRUE(st.is()) << st; +} + +} // namespace doris diff --git a/regression-test/data/fault_injection_p0/test_ignore_not_found_segment.out b/regression-test/data/fault_injection_p0/test_ignore_not_found_segment.out new file mode 100644 index 00000000000000..5640edfdfa13dc --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_ignore_not_found_segment.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ignore_enabled -- +0 + +-- !ignore_io_error -- +0 + +-- !recovery -- +6 + diff --git a/regression-test/suites/fault_injection_p0/test_ignore_not_found_segment.groovy b/regression-test/suites/fault_injection_p0/test_ignore_not_found_segment.groovy new file mode 100644 index 00000000000000..854e7864ecf486 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_ignore_not_found_segment.groovy @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_ignore_not_found_segment", "nonConcurrent") { + def tableName = "test_ignore_not_found_segment_tbl" + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort) + + def set_be_config = { key, value -> + for (String backend_id: backendId_to_backendIP.keySet()) { + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("set be config ${key}=${value}, code: ${code}, out: ${out}, err: ${err}") + } + } + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `k1` int NOT NULL, + `v1` string NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + // Insert data across multiple segments (each INSERT creates a new segment) + sql "INSERT INTO ${tableName} VALUES (1, 'aaa'), (2, 'bbb');" + sql "INSERT INTO ${tableName} VALUES (3, 'ccc'), (4, 'ddd');" + sql "INSERT INTO ${tableName} VALUES (5, 'eee'), (6, 'fff');" + + // NOTE: Do NOT query the table before fault injection tests. + // Any query would populate the segment LRU cache, and disable_segment_cache + // only prevents new insertions — it does not block lookups from existing cache. + // Keep disable_segment_cache=true throughout ALL fault injection tests to prevent + // segments from being cached between test cases. + + set_be_config.call("disable_segment_cache", "true") + try { + // Test 1: With ignore_not_found_segment=true (default), injecting NOT_FOUND + // should return 0 rows since all segments fail to load and are skipped. + try { + set_be_config.call("ignore_not_found_segment", "true") + GetDebugPoint().enableDebugPointForAllBEs("BetaRowset::load_segment.return_not_found") + + qt_ignore_enabled "SELECT count(*) FROM ${tableName}" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("BetaRowset::load_segment.return_not_found") + } + + // Test 2: With ignore_not_found_segment=false, injecting NOT_FOUND should cause query failure + try { + set_be_config.call("ignore_not_found_segment", "false") + GetDebugPoint().enableDebugPointForAllBEs("BetaRowset::load_segment.return_not_found") + + test { + sql "SELECT count(*) FROM ${tableName}" + exception "NOT_FOUND" + } + } finally { + GetDebugPoint().disableDebugPointForAllBEs("BetaRowset::load_segment.return_not_found") + } + + // Test 3: With ignore_not_found_segment=true, injecting IO_ERROR + // should return 0 rows since all segments fail to load and are skipped. + try { + set_be_config.call("ignore_not_found_segment", "true") + GetDebugPoint().enableDebugPointForAllBEs("BetaRowset::load_segment.return_io_error") + + qt_ignore_io_error "SELECT count(*) FROM ${tableName}" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("BetaRowset::load_segment.return_io_error") + } + + // Test 4: With ignore_not_found_segment=false, injecting IO_ERROR should cause query failure + try { + set_be_config.call("ignore_not_found_segment", "false") + GetDebugPoint().enableDebugPointForAllBEs("BetaRowset::load_segment.return_io_error") + + test { + sql "SELECT count(*) FROM ${tableName}" + exception "IO_ERROR" + } + } finally { + GetDebugPoint().disableDebugPointForAllBEs("BetaRowset::load_segment.return_io_error") + } + } finally { + set_be_config.call("disable_segment_cache", "false") + set_be_config.call("ignore_not_found_segment", "true") + } + + // Test 5: After clearing the debug point, data should be fully accessible again + qt_recovery "SELECT count(*) FROM ${tableName}" +}