@@ -1025,6 +1025,8 @@ void ClusterInfo::loadPlan() {
1025
1025
decltype (_plannedCollections) newCollections;
1026
1026
decltype (_shards) newShards;
1027
1027
decltype (_shardServers) newShardServers;
1028
+ decltype (_shardToShardGroupLeader) newShardToShardGroupLeader;
1029
+ decltype (_shardGroups) newShardGroups;
1028
1030
decltype (_shardToName) newShardToName;
1029
1031
decltype (_dbAnalyzersRevision) newDbAnalyzersRevision;
1030
1032
decltype (_newStuffByDatabase) newStuffByDatabase;
@@ -1041,6 +1043,8 @@ void ClusterInfo::loadPlan() {
1041
1043
newCollections = _plannedCollections;
1042
1044
newShards = _shards;
1043
1045
newShardServers = _shardServers;
1046
+ newShardToShardGroupLeader = _shardToShardGroupLeader;
1047
+ newShardGroups = _shardGroups;
1044
1048
newShardToName = _shardToName;
1045
1049
newDbAnalyzersRevision = _dbAnalyzersRevision;
1046
1050
newStuffByDatabase = _newStuffByDatabase;
@@ -1097,6 +1101,8 @@ void ClusterInfo::loadPlan() {
1097
1101
newShards.erase (shardName);
1098
1102
newShardServers.erase (shardName);
1099
1103
newShardToName.erase (shardName);
1104
+ newShardToShardGroupLeader.erase (shardName);
1105
+ newShardGroups.erase (shardName);
1100
1106
}
1101
1107
}
1102
1108
}
@@ -1449,6 +1455,10 @@ void ClusterInfo::loadPlan() {
1449
1455
newShards.erase (shardId);
1450
1456
newShardServers.erase (shardId);
1451
1457
newShardToName.erase (shardId);
1458
+ // We try to erase the shard ID anyway, no problem if it is
1459
+ // not in there, should it be a shard group leader!
1460
+ newShardToShardGroupLeader.erase (shardId);
1461
+ newShardGroups.erase (shardId);
1452
1462
}
1453
1463
collectionsPath.pop_back ();
1454
1464
}
@@ -1549,6 +1559,59 @@ void ClusterInfo::loadPlan() {
1549
1559
continue ;
1550
1560
}
1551
1561
}
1562
+ // Now that the loop is completed, we have to run through it one more
1563
+ // time to get the shard groups done:
1564
+ for (auto const & colPair : *databaseCollections) {
1565
+ if (colPair.first == colPair.second .collection ->name ()) {
1566
+ // Every collection shows up once with its ID and once with its name.
1567
+ // We only want it once, so we only take it when we see the ID, not
1568
+ // the name as key:
1569
+ continue ;
1570
+ }
1571
+ auto const & groupLeader =
1572
+ colPair.second .collection ->distributeShardsLike ();
1573
+ if (!groupLeader.empty ()) {
1574
+ auto groupLeaderCol = newShards.find (groupLeader);
1575
+ if (groupLeaderCol != newShards.end ()) {
1576
+ auto col = newShards.find (
1577
+ std::to_string (colPair.second .collection ->id ().id ()));
1578
+ if (col != newShards.end ()) {
1579
+ if (col->second ->size () == 0 ) {
1580
+ // Can happen for smart edge collections. But in this case we
1581
+ // can ignore the collection.
1582
+ continue ;
1583
+ }
1584
+ TRI_ASSERT (groupLeaderCol->second ->size () == col->second ->size ());
1585
+ for (size_t i = 0 ; i < col->second ->size (); ++i) {
1586
+ newShardToShardGroupLeader.try_emplace (
1587
+ col->second ->at (i), groupLeaderCol->second ->at (i));
1588
+ auto it = newShardGroups.find (groupLeaderCol->second ->at (i));
1589
+ if (it == newShardGroups.end ()) {
1590
+ // Need to create a new list:
1591
+ auto list = std::make_shared<std::vector<ShardID>>();
1592
+ list->reserve (2 );
1593
+ // group leader as well as member:
1594
+ list->emplace_back (groupLeaderCol->second ->at (i));
1595
+ list->emplace_back (col->second ->at (i));
1596
+ newShardGroups.try_emplace (groupLeaderCol->second ->at (i),
1597
+ std::move (list));
1598
+ } else {
1599
+ // Need to add us to the list:
1600
+ it->second ->push_back (col->second ->at (i));
1601
+ }
1602
+ }
1603
+ } else {
1604
+ LOG_TOPIC (" 12f32" , WARN, Logger::CLUSTER)
1605
+ << " loadPlan: Strange, could not find collection: "
1606
+ << colPair.second .collection ->name ();
1607
+ }
1608
+ } else {
1609
+ LOG_TOPIC (" 22312" , WARN, Logger::CLUSTER)
1610
+ << " loadPlan: Strange, could not find proto collection: "
1611
+ << groupLeader;
1612
+ }
1613
+ }
1614
+ }
1552
1615
newCollections.insert_or_assign (databaseName,
1553
1616
std::move (databaseCollections));
1554
1617
}
@@ -1649,6 +1712,8 @@ void ClusterInfo::loadPlan() {
1649
1712
_plannedCollections.swap (newCollections);
1650
1713
_shards.swap (newShards);
1651
1714
_shardServers.swap (newShardServers);
1715
+ _shardToShardGroupLeader.swap (newShardToShardGroupLeader);
1716
+ _shardGroups.swap (newShardGroups);
1652
1717
_shardToName.swap (newShardToName);
1653
1718
}
1654
1719
@@ -6064,21 +6129,41 @@ void ClusterInfo::setFailedServers(
6064
6129
#ifdef ARANGODB_USE_GOOGLE_TESTS
6065
6130
void ClusterInfo::setServers (
6066
6131
containers::FlatHashMap<ServerID, std::string> servers) {
6067
- WRITE_LOCKER (readLocker , _serversProt.lock );
6132
+ WRITE_LOCKER (writeLocker , _serversProt.lock );
6068
6133
_servers = std::move (servers);
6069
6134
}
6070
6135
6071
6136
void ClusterInfo::setServerAliases (
6072
6137
containers::FlatHashMap<ServerID, std::string> aliases) {
6073
- WRITE_LOCKER (readLocker , _serversProt.lock );
6138
+ WRITE_LOCKER (writeLocker , _serversProt.lock );
6074
6139
_serverAliases = std::move (aliases);
6075
6140
}
6076
6141
6077
6142
void ClusterInfo::setServerAdvertisedEndpoints (
6078
6143
containers::FlatHashMap<ServerID, std::string> advertisedEndpoints) {
6079
- WRITE_LOCKER (readLocker , _serversProt.lock );
6144
+ WRITE_LOCKER (writeLocker , _serversProt.lock );
6080
6145
_serverAdvertisedEndpoints = std::move (advertisedEndpoints);
6081
6146
}
6147
+
6148
+ void ClusterInfo::setShardToShardGroupLeader (
6149
+ containers::FlatHashMap<ShardID, ShardID> shardToShardGroupLeader) {
6150
+ WRITE_LOCKER (writeLocker, _planProt.lock );
6151
+ _shardToShardGroupLeader = std::move (shardToShardGroupLeader);
6152
+ }
6153
+
6154
+ void ClusterInfo::setShardGroups (
6155
+ containers::FlatHashMap<ShardID, std::shared_ptr<std::vector<ShardID>>>
6156
+ shardGroups) {
6157
+ WRITE_LOCKER (writeLocker, _planProt.lock );
6158
+ _shardGroups = std::move (shardGroups);
6159
+ }
6160
+
6161
+ void ClusterInfo::setShardIds (
6162
+ containers::FlatHashMap<ShardID, std::shared_ptr<std::vector<ServerID>>>
6163
+ shardIds) {
6164
+ WRITE_LOCKER (writeLocker, _currentProt.lock );
6165
+ _shardIds = std::move (shardIds);
6166
+ }
6082
6167
#endif
6083
6168
6084
6169
bool ClusterInfo::serverExists (std::string_view serverId) const noexcept {
@@ -6945,6 +7030,26 @@ VPackBuilder ClusterInfo::toVelocyPack() {
6945
7030
}
6946
7031
}
6947
7032
}
7033
+ dump.add (VPackValue (" shardToShardGroupLeader" ));
7034
+ {
7035
+ VPackObjectBuilder d (&dump);
7036
+ for (auto const & s : _shardToShardGroupLeader) {
7037
+ dump.add (s.first , VPackValue (s.second ));
7038
+ }
7039
+ }
7040
+ dump.add (VPackValue (" shardGroups" ));
7041
+ {
7042
+ VPackObjectBuilder d (&dump);
7043
+ for (auto const & s : _shardGroups) {
7044
+ dump.add (VPackValue (s.first ));
7045
+ {
7046
+ VPackArrayBuilder d2 (&dump);
7047
+ for (auto const & ss : *s.second ) {
7048
+ dump.add (VPackValue (ss));
7049
+ }
7050
+ }
7051
+ }
7052
+ }
6948
7053
dump.add (VPackValue (" shards" ));
6949
7054
{
6950
7055
VPackObjectBuilder d (&dump);
0 commit comments