Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@

3.12.5.4 (2025-09-19)
---------------------

* Fix ES-2678: In the upgrade scenario, we disable the ClusterFeature.
We should therefore check before accessing clusterInfo() whether we're
in an upgrade scenario. Added those checks in iresearch.

3.12.5.3 (2025-09-09)
---------------------

Expand Down
12 changes: 8 additions & 4 deletions arangod/Cluster/ClusterFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1149,12 +1149,16 @@ std::shared_ptr<HeartbeatThread> ClusterFeature::heartbeatThread() {

ClusterInfo& ClusterFeature::clusterInfo() {
if (!_clusterInfo) {
if (server().isStopping()) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_SHUTTING_DOWN);
} else {
ADB_PROD_ASSERT(_clusterInfo != nullptr)
if (!server().isStopping()) {
TRI_ASSERT(_clusterInfo != nullptr)
<< "_clusterInfo is null, but server is not shutting down";

LOG_TOPIC("325b6", ERR, arangodb::Logger::CLUSTER)
<< "_clusterInfo is null, but server is not shutting down";
// log crash dump feature
CrashHandler::logBacktrace();
}
THROW_ARANGO_EXCEPTION(TRI_ERROR_SHUTTING_DOWN);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Cluster Info Null Error Masked

The clusterInfo() method now unconditionally throws TRI_ERROR_SHUTTING_DOWN when _clusterInfo is null. This is problematic because if the server is not shutting down, it previously triggered a fatal assertion. The new behavior replaces this critical error with a misleading TRI_ERROR_SHUTTING_DOWN exception, obscuring the actual problem of _clusterInfo being unavailable during normal operation.

Fix in Cursor Fix in Web

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is OK, because we maintain complete backwards compatibilitiy.

}
return *_clusterInfo;
}
Expand Down
28 changes: 18 additions & 10 deletions arangod/IResearch/IResearchAnalyzerFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,8 @@ Result visitAnalyzers(TRI_vocbase_t& vocbase,
auto& server = vocbase.server();

std::vector<std::string> coords;
if (server.hasFeature<ClusterFeature>()) {
if (server.hasFeature<ClusterFeature>() &&
server.getFeature<ClusterFeature>().isEnabled()) {
coords = server.getFeature<ClusterFeature>()
.clusterInfo()
.getCurrentCoordinators();
Expand All @@ -508,6 +509,7 @@ Result visitAnalyzers(TRI_vocbase_t& vocbase,
TRI_IF_FAILURE("CheckDBWhenSingleShardAndForceOneShardChange") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}

auto& clusterInfo = server.getFeature<ClusterFeature>().clusterInfo();
auto collection = clusterInfo.getCollectionNT(
vocbase.name(), arangodb::StaticStrings::AnalyzersCollection);
Expand Down Expand Up @@ -775,7 +777,8 @@ bool analyzerInUse(ArangodServer& server, std::string_view dbName,
AnalyzerModificationTransaction::Ptr createAnalyzerModificationTransaction(
ArangodServer& server, std::string_view vocbase) {
if (ServerState::instance()->isCoordinator() && !vocbase.empty()) {
TRI_ASSERT(server.hasFeature<ClusterFeature>());
TRI_ASSERT(server.hasFeature<ClusterFeature>() &&
server.getFeature<ClusterFeature>().isEnabled());
auto& engine = server.getFeature<ClusterFeature>().clusterInfo();
return std::make_unique<AnalyzerModificationTransaction>(vocbase, &engine,
false);
Expand Down Expand Up @@ -1111,13 +1114,16 @@ IResearchAnalyzerFeature::IResearchAnalyzerFeature(Server& server)
return;
}

auto cleanupTrans = this->server()
.getFeature<ClusterFeature>()
.clusterInfo()
.createAnalyzersCleanupTrans();
if (cleanupTrans) {
if (cleanupTrans->start().ok()) {
cleanupTrans->commit();
if (this->server().hasFeature<ClusterFeature>() &&
this->server().getFeature<ClusterFeature>().isEnabled()) {
auto cleanupTrans = this->server()
.getFeature<ClusterFeature>()
.clusterInfo()
.createAnalyzersCleanupTrans();
if (cleanupTrans) {
if (cleanupTrans->start().ok()) {
cleanupTrans->commit();
}
}
}

Expand Down Expand Up @@ -2463,7 +2469,9 @@ AnalyzersRevision::Ptr IResearchAnalyzerFeature::getAnalyzersRevision(
const TRI_vocbase_t& vocbase, bool forceLoadPlan /* = false */) const {
if (ServerState::instance()->isRunningInCluster()) {
auto const& server = vocbase.server();
if (server.hasFeature<ClusterFeature>()) {

if (server.hasFeature<ClusterFeature>() &&
server.getFeature<ClusterFeature>().isEnabled()) {
auto ptr = server.getFeature<ClusterFeature>()
.clusterInfo()
.getAnalyzersRevision(vocbase.name(), forceLoadPlan);
Expand Down
30 changes: 25 additions & 5 deletions arangod/IResearch/IResearchDataStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,18 +651,27 @@ IResearchDataStore::IResearchDataStore(
_asyncSelf(std::make_shared<AsyncLinkHandle>(nullptr)),
_maintenanceState(std::make_shared<MaintenanceState>()) {
// initialize transaction callback

#ifdef USE_ENTERPRISE
if (!collection.isAStub() && _asyncFeature->columnsCacheOnlyLeaders()) {
auto& ci = server.getFeature<ClusterFeature>().clusterInfo();
auto maybeShardID = ShardID::shardIdFromString(collection.name());
if (maybeShardID.fail()) {
// Illegal shard name, could be collection name
_useSearchCache = false;
} else {
TRI_ASSERT(maybeShardID.ok());
auto r = ci.getShardLeadership(ServerState::instance()->getId(),
maybeShardID.get());
_useSearchCache = r == ClusterInfo::ShardLeadership::kLeader;
if (server.hasFeature<ClusterFeature>() &&
server.getFeature<ClusterFeature>().isEnabled()) {
TRI_ASSERT(maybeShardID.ok());

auto& ci = server.getFeature<ClusterFeature>().clusterInfo();
auto r = ci.getShardLeadership(ServerState::instance()->getId(),
maybeShardID.get());
_useSearchCache = r == ClusterInfo::ShardLeadership::kLeader;
} else {
// During an upgrade, the ClusterFeature is disabled.
// Therefore we disable search cache.
_useSearchCache = false;
}
}
}
#endif
Expand Down Expand Up @@ -904,6 +913,17 @@ Result IResearchDataStore::commitUnsafeImpl(
return false;
}
auto& collection = index().collection();

if (!collection.vocbase().server().hasFeature<ClusterFeature>() ||
!collection.vocbase()
.server()
.getFeature<ClusterFeature>()
.isEnabled()) {
// clusterInfo() is only availbale when ClusterFeature is enabled
// which is not the case during a local upgrade.
return false;
}

auto& ci = collection.vocbase()
.server()
.getFeature<ClusterFeature>()
Expand Down
4 changes: 3 additions & 1 deletion arangod/IResearch/IResearchFeature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,9 @@ uint32_t computeThreadsCount(uint32_t threads, uint32_t threadsLimit,
Result upgradeArangoSearchLinkCollectionName(
TRI_vocbase_t& vocbase, velocypack::Slice /*upgradeParams*/) {
using application_features::ApplicationServer;
if (!ServerState::instance()->isDBServer()) {
if (!ServerState::instance()->isDBServer() ||
!vocbase.server().hasFeature<ClusterFeature>() ||
!vocbase.server().getFeature<ClusterFeature>().isEnabled()) {
return {}; // not applicable for other ServerState roles
}
auto& selector = vocbase.server().getFeature<EngineSelectorFeature>();
Expand Down
9 changes: 8 additions & 1 deletion arangod/IResearch/IResearchLink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,14 @@ Result IResearchLink::initSingleServer(bool& pathExists,

Result IResearchLink::initCoordinator(InitCallback const& init) {
auto& vocbase = index().collection().vocbase();
auto& ci = vocbase.server().getFeature<ClusterFeature>().clusterInfo();
auto& cf = vocbase.server().getFeature<ClusterFeature>();
if (!cf.isEnabled()) {
LOG_TOPIC("9be20", DEBUG, TOPIC)
<< "Skipped link '" << index().id().id()
<< "' maybe due to disabled cluster features.";
return {};
}
auto& ci = cf.clusterInfo();
std::shared_ptr<IResearchViewCoordinator> view;
if (auto r = toView(ci.getView(vocbase.name(), _viewGuid), view); !view) {
return r;
Expand Down
9 changes: 6 additions & 3 deletions arangod/IResearch/IResearchViewCoordinator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ struct IResearchViewCoordinator::ViewFactory final : arangodb::ViewFactory {
Result create(LogicalView::ptr& view, TRI_vocbase_t& vocbase,
VPackSlice definition, bool isUserRequest) const final {
auto& server = vocbase.server();
if (!server.hasFeature<ClusterFeature>()) {
if (!server.hasFeature<ClusterFeature>() ||
!server.getFeature<ClusterFeature>().isEnabled()) {
return {TRI_ERROR_INTERNAL,
absl::StrCat("failure to find 'ClusterInfo' instance while "
"creating arangosearch View in database '",
Expand Down Expand Up @@ -341,7 +342,8 @@ Result IResearchViewCoordinator::properties(velocypack::Slice slice,
bool isUserRequest,
bool partialUpdate) {
auto& server = vocbase().server();
if (!server.hasFeature<ClusterFeature>()) {
if (!server.hasFeature<ClusterFeature>() ||
!server.getFeature<ClusterFeature>().isEnabled()) {
return {
TRI_ERROR_INTERNAL,
absl::StrCat(
Expand Down Expand Up @@ -447,7 +449,8 @@ Result IResearchViewCoordinator::properties(velocypack::Slice slice,

Result IResearchViewCoordinator::dropImpl() {
auto& server = vocbase().server();
if (!server.hasFeature<ClusterFeature>()) {
if (!server.hasFeature<ClusterFeature>() ||
!server.getFeature<ClusterFeature>().isEnabled()) {
return {
TRI_ERROR_INTERNAL,
absl::StrCat(
Expand Down