Skip to content

Commit

Permalink
Merge pull request #2464 from htcondor/HTCONDOR-2443-branch
Browse files Browse the repository at this point in the history
HTCONDOR-2443 Implement the AUTO value of ENABLE_STARTD_DAEMON_AD
  • Loading branch information
ColeBollig committed May 16, 2024
2 parents c458df5 + 67e3daf commit d17f779
Show file tree
Hide file tree
Showing 15 changed files with 328 additions and 70 deletions.
8 changes: 6 additions & 2 deletions src/condor_daemon_client/daemon_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ CollectorList::sendUpdates (int cmd, ClassAd * ad1, ClassAd* ad2, bool nonblocki
// advance the sequence numbers for these ads
//
time_t now = time(NULL);
DCCollectorAdSeq * seqgen = adSeq->getAdSeq(*ad1);
if (seqgen) { seqgen->advance(now); }
adSeq->getAdSeq(*ad1).advance(now);

size_t num_collectors = m_list.size();
for (auto& daemon : m_list) {
Expand Down Expand Up @@ -176,6 +175,11 @@ CollectorList::sendUpdates (int cmd, ClassAd * ad1, ClassAd* ad2, bool nonblocki
return success_count;
}

// pass flag down to the individual DCCollector objects
void CollectorList::checkVersionBeforeSendingUpdates(bool check) {
for (auto * dcc : m_list) { if (dcc) dcc->checkVersionBeforeSendingUpdate(check); }
}

QueryResult
CollectorList::query (CondorQuery & cQuery, bool (*callback)(void*, ClassAd *), void* pv, CondorError * errstack) {

Expand Down
2 changes: 2 additions & 0 deletions src/condor_daemon_client/daemon_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class CollectorList {
DCTokenRequester *token_requester = nullptr, const std::string &identity = "",
const std::string authz_name = "");

void checkVersionBeforeSendingUpdates(bool check);

std::vector<DCCollector*>& getList() { return m_list; }

// use this to detach the ad sequence counters before destroying the collector list
Expand Down
72 changes: 43 additions & 29 deletions src/condor_daemon_client/daemon_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,48 +26,50 @@
#include <map>
#include <algorithm>

struct _adtype_and_daemon_type { AdTypes at; daemon_t dt; };

// Map of MyType of an ad to what daemon produces it
// Primary user is python bindings code to initialize the Daemon class
// But it might be generally useful.
// This returns return a std::array at compile time that other
// consteval functions can use as a lookup table
constexpr
std::array<std::pair<const char *, daemon_t>,23>
std::array<std::pair<const char *, _adtype_and_daemon_type>,23>
makeAdTypeToDaemonTable() {
return {{ // yes, there needs to be 2 open braces here...
{ ANY_ADTYPE, DT_ANY },
{ STARTD_SLOT_ADTYPE, DT_STARTD },
{ STARTD_DAEMON_ADTYPE, DT_STARTD },
{ STARTD_OLD_ADTYPE, DT_STARTD },
{ SCHEDD_ADTYPE, DT_SCHEDD },
{ SUBMITTER_ADTYPE, DT_SCHEDD },
{ MASTER_ADTYPE, DT_MASTER },
{ COLLECTOR_ADTYPE, DT_COLLECTOR },
{ NEGOTIATOR_ADTYPE, DT_NEGOTIATOR },
{ ACCOUNTING_ADTYPE, DT_NEGOTIATOR },
{ HAD_ADTYPE, DT_HAD },
{ REPLICATION_ADTYPE, DT_GENERIC }, // Replocation ads go into the generic table in the collector
{ CLUSTER_ADTYPE, DT_CLUSTER },
{ GENERIC_ADTYPE, DT_GENERIC },
{ CREDD_ADTYPE, DT_CREDD },
{ XFER_SERVICE_ADTYPE, DT_TRANSFERD },
{ LEASE_MANAGER_ADTYPE, DT_LEASE_MANAGER },
{ GRID_ADTYPE, DT_GRIDMANAGER },
{ DEFRAG_ADTYPE, DT_GENERIC }, // Defrag ads go into the generic table in the collector
{ JOB_ROUTER_ADTYPE, DT_GENERIC }, // Job_Router ads go into the generic table in the collector
{ JOB_ADTYPE, DT_SCHEDD },
{ JOB_SET_ADTYPE, DT_SCHEDD },
{ OWNER_ADTYPE, DT_SCHEDD },
{ ANY_ADTYPE, { ANY_AD, DT_ANY } },
{ STARTD_SLOT_ADTYPE, { SLOT_AD, DT_STARTD } },
{ STARTD_DAEMON_ADTYPE, { STARTDAEMON_AD, DT_STARTD } },
{ STARTD_OLD_ADTYPE, { STARTD_AD, DT_STARTD } },
{ SCHEDD_ADTYPE, { SCHEDD_AD, DT_SCHEDD } },
{ SUBMITTER_ADTYPE, { SUBMITTOR_AD, DT_SCHEDD } },
{ MASTER_ADTYPE, { MASTER_AD, DT_MASTER } },
{ COLLECTOR_ADTYPE, { COLLECTOR_AD, DT_COLLECTOR } },
{ NEGOTIATOR_ADTYPE, { NEGOTIATOR_AD, DT_NEGOTIATOR } },
{ ACCOUNTING_ADTYPE, { ACCOUNTING_AD, DT_NEGOTIATOR } },
{ HAD_ADTYPE, { HAD_AD, DT_HAD } },
{ REPLICATION_ADTYPE, { GENERIC_AD, DT_GENERIC } }, // Replocation ads go into the generic table in the collector
{ CLUSTER_ADTYPE, { CLUSTER_AD, DT_CLUSTER } },
{ GENERIC_ADTYPE, { GENERIC_AD, DT_GENERIC } },
{ CREDD_ADTYPE, { CREDD_AD, DT_CREDD } },
{ XFER_SERVICE_ADTYPE, { XFER_SERVICE_AD, DT_TRANSFERD } },
{ LEASE_MANAGER_ADTYPE, {LEASE_MANAGER_AD, DT_LEASE_MANAGER } },
{ GRID_ADTYPE, { GRID_AD, DT_GRIDMANAGER } },
{ DEFRAG_ADTYPE, { DEFRAG_AD, DT_GENERIC } }, // Defrag ads go into the generic table in the collector
{ JOB_ROUTER_ADTYPE, { GENERIC_AD, DT_GENERIC } }, // Job_Router ads go into the generic table in the collector
{ JOB_ADTYPE, { NO_AD, DT_SCHEDD } }, // should we have an AdTypes entry for jobs ?
{ JOB_SET_ADTYPE, { NO_AD, DT_SCHEDD } }, // should we have an AdTypes entry for jobs ?
{ OWNER_ADTYPE, { NO_AD, DT_SCHEDD } }, // should we have an AdTypes entry for jobs ?

}};
}

template<size_t N> constexpr
auto sortByFirst(const std::array<std::pair<const char *, daemon_t>, N> &table) {
auto sortByFirst(const std::array<std::pair<const char *, _adtype_and_daemon_type>, N> &table) {
auto sorted = table;
std::sort(sorted.begin(), sorted.end(),
[](const std::pair<const char *, daemon_t> &lhs,
const std::pair<const char *, daemon_t> &rhs) {
[](const std::pair<const char *, _adtype_and_daemon_type> &lhs,
const std::pair<const char *, _adtype_and_daemon_type> &rhs) {
return istring_view(lhs.first) < istring_view(rhs.first);
});
return sorted;
Expand All @@ -78,13 +80,25 @@ AdTypeStringToDaemonType(const char* adtype_string)
{
constexpr static const auto table = sortByFirst(makeAdTypeToDaemonTable());
auto it = std::lower_bound(table.begin(), table.end(), adtype_string,
[](const std::pair<const char *, daemon_t> &p, const char * name) {
[](const std::pair<const char *, _adtype_and_daemon_type> &p, const char * name) {
return istring_view(p.first) < istring_view(name);
});;
if ((it != table.end()) && (istring_view(it->first) == istring_view(adtype_string))) return it->second;
if ((it != table.end()) && (istring_view(it->first) == istring_view(adtype_string))) return it->second.dt;
return DT_NONE;
}

AdTypes
AdTypeStringToAdType(const char* adtype_string)
{
constexpr static const auto table = sortByFirst(makeAdTypeToDaemonTable());
auto it = std::lower_bound(table.begin(), table.end(), adtype_string,
[](const std::pair<const char *, _adtype_and_daemon_type> &p, const char * name) {
return istring_view(p.first) < istring_view(name);
});;
if ((it != table.end()) && (istring_view(it->first) == istring_view(adtype_string))) return it->second.at;
return NO_AD;
}

static const char* daemon_names[] = {
"none",
"any",
Expand Down
3 changes: 3 additions & 0 deletions src/condor_daemon_client/daemon_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ const char* daemonString( daemon_t dt );
daemon_t stringToDaemonType( const char* name );
// convert MyType string from a location ad to the daemon_t enum
daemon_t AdTypeStringToDaemonType( const char* adtype_name );
// convert MyType string to the AdTypes enum
// returns NO_AD for ads that have a valid MyType but no corresponding enum value
AdTypes AdTypeStringToAdType(const char* adtype_name);

// If `d` corresponds to an ad type, sets `a` to that type and return true;
// otherwise, returns false.
Expand Down
70 changes: 60 additions & 10 deletions src/condor_daemon_client/dc_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,11 @@ DCCollector::parseTCPInfo( void )
}
}

// do a >= version check
bool DCCollector::checkCachedVersion(int major, int minor, int subminor, bool default_value) {
if (_version.empty()) return default_value;
return CondorVersionInfo(_version.c_str()).built_since_version(major, minor, subminor);
}

bool
DCCollector::sendUpdate( int cmd, ClassAd* ad1, DCCollectorAdSequences& adSeq, ClassAd* ad2, bool nonblocking, StartCommandCallbackType callback_fn, void *miscdata)
Expand All @@ -287,6 +292,23 @@ DCCollector::sendUpdate( int cmd, ClassAd* ad1, DCCollectorAdSequences& adSeq, C
nonblocking = false;
}

// if we have not yet stashed the collector version from the sock into the daemon object do that now.
// the update sock version is set by security negotiatiation, so we won't know it for the first update.
// By the time we *do* know it we are committed to the command int <sigh>. Because of this it is the
// callers responsibility to make sure that the first command over the update sock is safe for
// all versions of the collector.
// The adtype INVALIDATE commands are safe, the adtype UPDATE commands are only safe if adtype
// is one of the adtypes that has been around forever *and* the ad itself is one that the collector
// is willing to store. Thus it is *not* safe to a STARTD daemon ad to be the first update, because
// a pre 23.2 collector will either store it in the slot ad table, or it will close the connection if
// it cannot be hashed as a slot ad.
if (_version.empty() && update_rsock) {
auto * verinfo = update_rsock->get_peer_version();
if (verinfo) { _version = verinfo->get_version_stdstring(); }
dprintf(D_ZKM, "DCCollector::sendUpdate collector %s version was unknown, is now %s\n",
_name.c_str(), _version.c_str());
}

// Add start time & seq # to the ads before we publish 'em
if ( ad1 ) {
ad1->Assign(ATTR_DAEMON_START_TIME, startTime);
Expand All @@ -298,12 +320,33 @@ DCCollector::sendUpdate( int cmd, ClassAd* ad1, DCCollectorAdSequences& adSeq, C
}

if ( ad1 ) {
DCCollectorAdSeq* seqgen = adSeq.getAdSeq(*ad1);
if (seqgen) {
long long seq = seqgen->getSequence();
ad1->Assign(ATTR_UPDATE_SEQUENCE_NUMBER, seq);
if (ad2) { ad2->Assign(ATTR_UPDATE_SEQUENCE_NUMBER, seq); }
auto & seqgen = adSeq.getAdSeq(*ad1);
if (cmd == UPDATE_STARTD_AD && seqgen.getAdType() == STARTDAEMON_AD
&& do_version_check_before_startd_daemon_ad_update) {
// We can't send STARTD daemon ads to collectors that are older than 23.2
// so when we don't know the version, or the collector is older, just fail now
const char * err_reason = nullptr;
if (_version.empty()) {
err_reason = "version is not known";
} else if ( ! CondorVersionInfo(_version.c_str()).built_since_version(23,2,0)) {
err_reason = "version is older than 23.2";
}
if (err_reason) {
std::string err_msg, adname;
ad1->LookupString(ATTR_NAME, adname);
formatstr(err_msg, "Collector %s %s - will not send STARD daemon ad %s",
_name.c_str(), err_reason, adname.c_str());
newError(CA_INVALID_REQUEST, err_msg.c_str());
if (callback_fn) {
(*callback_fn)(false, nullptr, nullptr, "", false, miscdata);
}
dprintf(D_ZKM, "DCCollector::sendUpdate will not send STARTD daemon ad because %s\n", err_reason);
return false;
}
}
long long seq = seqgen.getSequence();
ad1->Assign(ATTR_UPDATE_SEQUENCE_NUMBER, seq);
if (ad2) { ad2->Assign(ATTR_UPDATE_SEQUENCE_NUMBER, seq); }
}

// Prior to 7.2.0, the negotiator depended on the startd
Expand Down Expand Up @@ -383,8 +426,11 @@ DCCollector::finishUpdate( DCCollector *self, Sock* sock, ClassAd* ad1, ClassAd*
// to share submitter secrets.
auto *verinfo = sock->get_peer_version();
bool send_submitter_secrets = false;
if (verinfo && verinfo->built_since_version(8, 9, 3)) {
send_submitter_secrets = true;
if (verinfo) {
if (self && self->_version.empty()) { self->_version = verinfo->get_version_stdstring(); };
if (verinfo->built_since_version(8, 9, 3)) {
send_submitter_secrets = true;
}
}

// If we are advertising to an admin-configured pool, then
Expand Down Expand Up @@ -780,20 +826,24 @@ DCCollector::initDestinationStrings( void )
//

// Get a sequence number class for this classad, creating it if needed.
DCCollectorAdSeq* DCCollectorAdSequences::getAdSeq(const ClassAd & ad)
DCCollectorAdSeq& DCCollectorAdSequences::getAdSeq(const ClassAd & ad)
{
AdTypes mytype = NO_AD;
std::string name, attr;
ad.LookupString( ATTR_NAME, name );
ad.LookupString( ATTR_MY_TYPE, attr );
name += "\n"; name += attr;
if ( ! attr.empty()) { mytype = AdTypeStringToAdType(attr.c_str()); }
ad.LookupString( ATTR_MACHINE, attr );
name += "\n"; name += attr;

DCCollectorAdSeqMap::iterator it = seqs.find(name);
if (it != seqs.end()) {
return &(it->second);
return it->second;
}
return &(seqs[name]);
auto & seq = seqs[name];
seq.setAdType(mytype);
return seq;
}

DCCollector::~DCCollector( void )
Expand Down
17 changes: 13 additions & 4 deletions src/condor_daemon_client/dc_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
//
class DCCollectorAdSeq {
public:
DCCollectorAdSeq() : sequence(0), last_advance(0) {}
DCCollectorAdSeq() = default;
long long getSequence() const { return sequence; }
time_t lastAdvance() const { return last_advance; }
long long advance(time_t now) {
Expand All @@ -44,15 +44,18 @@ class DCCollectorAdSeq {
++sequence;
return sequence;
}
AdTypes getAdType() const { return adtype; }
void setAdType(AdTypes adt) { adtype = adt; }
protected:
long long sequence; // current sequence number
time_t last_advance; // last time advance was called (for garbage collection if we ever want to do that)
long long sequence{0}; // current sequence number
time_t last_advance{0}; // last time advance was called (for garbage collection if we ever want to do that)
AdTypes adtype{NO_AD};
};

typedef std::map<std::string, DCCollectorAdSeq> DCCollectorAdSeqMap;
class DCCollectorAdSequences {
public:
DCCollectorAdSeq* getAdSeq(const ClassAd & ad);
DCCollectorAdSeq& getAdSeq(const ClassAd & ad);
private:
DCCollectorAdSeqMap seqs;
};
Expand Down Expand Up @@ -103,6 +106,11 @@ class DCCollector : public Daemon {
void blacklistMonitorQueryFinished( bool success );

bool useTCPForUpdates() const { return use_tcp; }
void checkVersionBeforeSendingUpdate(bool check) { do_version_check_before_startd_daemon_ad_update = check; }
bool checkVersionBeforeSendingUpdate() { return do_version_check_before_startd_daemon_ad_update; }
// do a version check against the cached version number, return default value if version is not known
bool checkCachedVersion(int major, int minor, int subminor, bool default_value);
bool hasVersion() { return ! _version.empty(); }

time_t getStartTime() const { return startTime; }
time_t getReconfigTime() const { return reconfigTime; }
Expand All @@ -124,6 +132,7 @@ class DCCollector : public Daemon {

bool use_tcp;
bool use_nonblocking_update;
bool do_version_check_before_startd_daemon_ad_update{true};
UpdateType up_type;

std::deque<class UpdateData*> pending_update_list;
Expand Down
14 changes: 14 additions & 0 deletions src/condor_daemon_core.V6/daemon_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10817,6 +10817,20 @@ DaemonCore::initCollectorList() {
delete m_collector_list;
}
m_collector_list = CollectorList::create(NULL, adSeq);

// This param has legal values of TRUE, FALSE, and AUTO
// but we only need to check for TRUE here because TRUE means we
// should disable the version check for sending updates.
// it is the caller's responsibility to honor the FALSE state
if (m_collector_list && param_true("ENABLE_STARTD_DAEMON_AD")) {
// disable version check, so that we will not drop the initial update when
// the version is unknown. This is necessary because for collectors, the version
// is not known util we are already committed to sending the update.
// And because of offline collectors, etc, it is very difficult to make sure
// that the initial update is always safe for older collectors. Setting
// this knob to true is how an admin tells us not to worry about older collectors.
m_collector_list->checkVersionBeforeSendingUpdates(false);
}
}


Expand Down
8 changes: 4 additions & 4 deletions src/condor_includes/condor_adtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
typedef enum : long
{
NO_AD = -1,
STARTD_AD, // unspecified Startd ad type, "Machine" or "Slot" or "StartdDaemon" depending on context
STARTD_AD, // unspecified Startd ad type, "Machine" or "Slot" or "StartD" depending on context
SCHEDD_AD,
MASTER_AD,
GATEWAY_AD,
Expand All @@ -73,7 +73,7 @@ typedef enum : long
LICENSE_AD,
STORAGE_AD,
ANY_AD,
BOGUS_AD, // placeholder: NUM_AD_TYPES used wrongly to be here
BOGUS_AD, // NUM_AD_TYPES used wrongly to be here, now used by COLLECTOR as MULTI
CLUSTER_AD,
NEGOTIATOR_AD,
HAD_AD,
Expand All @@ -86,8 +86,8 @@ typedef enum : long
LEASE_MANAGER_AD, // placeholder: this type no longer used
DEFRAG_AD,
ACCOUNTING_AD,
SLOT_AD,
STARTDAEMON_AD,
SLOT_AD, // Explicitly a Startd slot ad, currently maps to "Machine" instead of "Slot" for backward compat
STARTDAEMON_AD, // Explicitly a Startd daemon ad
// This should *ALWAYS* be at the end of this list
NUM_AD_TYPES,
} AdTypes;
Expand Down

0 comments on commit d17f779

Please sign in to comment.