Skip to content

Commit

Permalink
Merge pull request cms-sw#5978 from nclopezo/backport-5714-to-72X
Browse files Browse the repository at this point in the history
Backport of cms-sw#5714 to 72X: Implement timeouts for the Xrootd client.
  • Loading branch information
nclopezo committed Oct 24, 2014
2 parents ef5dd59 + f1a518e commit a02b5d1
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 4 deletions.
5 changes: 4 additions & 1 deletion IOPool/Input/src/RootInputFileSequence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,10 @@ namespace edm {
usedFallback_ = true;
std::unique_ptr<InputSource::FileOpenSentry>
sentry(inputType_ == InputType::Primary ? new InputSource::FileOpenSentry(input_, lfn_, usedFallback_) : 0);
filePtr.reset(new InputFile(gSystem->ExpandPathName(fallbackName.c_str()), " Fallback request to file ", inputType_));
std::string fallbackFullName = gSystem->ExpandPathName(fallbackName.c_str());
StorageFactory *factory = StorageFactory::get();
if (factory) {factory->activateTimeout(fallbackFullName);}
filePtr.reset(new InputFile(fallbackFullName.c_str(), " Fallback request to file ", inputType_));
}
catch (cms::Exception const& e) {
if(!skipBadFiles) {
Expand Down
25 changes: 25 additions & 0 deletions Utilities/XrdAdaptor/plugins/XrdStorageMaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

class XrdStorageMaker : public StorageMaker
{
private:
unsigned int timeout_ = 0;

public:
/** Open a storage object for the given URL (protocol + path), using the
@a mode bits. No temporary files are downloaded. */
Expand All @@ -21,6 +24,11 @@ class XrdStorageMaker : public StorageMaker
// completely disabled, resulting in poor performance.
EnvPutInt(NAME_READCACHESIZE, 20*1024*1024);

// XrdClient has various timeouts which vary from 3 minutes to 8 hours.
// This enforces an even default (10 minutes) more appropriate for the
// cmsRun case.
if (timeout_ <= 0) {setTimeout(600);}

StorageFactory *f = StorageFactory::get();
StorageFactory::ReadHint readHint = f->readHint();
StorageFactory::CacheHint cacheHint = f->cacheHint();
Expand Down Expand Up @@ -76,6 +84,23 @@ class XrdStorageMaker : public StorageMaker
{
EnvPutInt("DebugLevel", level);
}

virtual void setTimeout(unsigned int timeout) override
{
timeout_ = timeout;
if (timeout == 0) {return;}
EnvPutInt("ConnectTimeout", timeout/3+1); // Default 120. This should allow multiple connections to timeout before the open fails.
EnvPutInt("RequestTimeout", timeout/3+1); // Default 300. This should allow almost three requests to be performed before the transaction times out.
EnvPutInt("TransactionTimeout", timeout); // Default 28800

// Safety mechanism - if client is redirected more than 255 times in 600
// seconds, then we abort the interaction.
EnvPutInt("RedirCntTimeout", 600); // Default 36000

// Enforce some CMS defaults.
EnvPutInt("MaxRedirectcount", 32); // Default 16
EnvPutInt("ReconnectWait", 5); // Default 5
}
};

DEFINE_EDM_PLUGIN (StorageMakerFactory, XrdStorageMaker, "root");
16 changes: 16 additions & 0 deletions Utilities/XrdAdaptor/src/XrdFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,22 @@ XrdFile::addConnection (cms::Exception &ex)
std::stringstream ss;
ss << "Current server connection: " << conn->GetCurrentUrl().GetUrl().c_str();
ex.addAdditionalInfo(ss.str());
if (conn->IsOpTimeLimitElapsed(time(NULL)))
{
ex.addAdditionalInfo("Operation timeout expired. This is possibly a temporary issue which may go away on retry.");
}
}
struct ServerResponseBody_Error * lastError = m_client->LastServerError();
if (lastError && lastError->errnum != kXR_noErrorYet)
{
std::stringstream ss;
// Note: I see no guarantee that the errmsg from the server is null-terminated.
// Hence, I'm adding the extra guarantee below.
char errmsg[4097]; errmsg[4096] = '\0';
strncpy(errmsg, lastError->errmsg, 4096);
ss << "Last server error (code=" << lastError->errnum << ")";
if (strlen(errmsg)) {ss << ": " << errmsg;}
ex.addAdditionalInfo(ss.str());
}
}

29 changes: 26 additions & 3 deletions Utilities/XrdAdaptor/src/XrdReadv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "XrdClient/XrdClientProtocol.hh"
#include "XrdClient/XrdClientConst.hh"
#include "XrdClient/XrdClientSid.hh"
#include "XrdClient/XrdClientEnv.hh"
#include "FWCore/Utilities/interface/EDMException.h"
#include "FWCore/Utilities/interface/Likely.h"

Expand Down Expand Up @@ -70,6 +71,9 @@ XrdFile::readv (IOPosBuffer *into, IOSize n)
return read(into[0].data(), into[0].size(), into[0].offset());
}

XrdClientConn *xrdc = m_client->GetClientConn();
if (xrdc) {xrdc->SetOpTimeLimit(EnvGetLong(NAME_TRANSACTIONTIMEOUT));}

// The main challenge here is to turn the request into a form that can be
// fed to the Xrootd connection. In particular, Xrootd has a limit on the
// maximum number of chunks and the maximum size of each chunk. Hence, the
Expand Down Expand Up @@ -112,16 +116,35 @@ XrdFile::readv (IOPosBuffer *into, IOSize n)
// readv_send will also parse the response and place the data into the result_list buffers.
IOSize tmp_total_len = readv_send(result_list, *read_chunk_list, chunk_off, readv_total_len);
total_len += tmp_total_len;
if (tmp_total_len != readv_total_len)
return total_len;
if (unlikely(tmp_total_len != readv_total_len))
{
edm::Exception ex(edm::errors::FileReadError);
ex << "XrdFile::readv(name='" << m_name << "')"
<< ".size=" << n << " Chunk of " << readv_total_len << " requested but "
<< tmp_total_len << " bytes returned by server.";
ex.addContext("Calling XrdFile::readv()");
addConnection(ex);
throw ex;
}
readv_total_len = 0;
chunk_off = 0;
}
}
}
// Do the actual readv for all remaining chunks.
if (chunk_off) {
total_len += readv_send(result_list, *read_chunk_list, chunk_off, readv_total_len);
IOSize tmp_total_len = readv_send(result_list, *read_chunk_list, chunk_off, readv_total_len);
if (unlikely(tmp_total_len != readv_total_len))
{
edm::Exception ex(edm::errors::FileReadError);
ex << "XrdFile::readv(name='" << m_name << "')"
<< ".size=" << n << " Chunk of " << readv_total_len << " requested but "
<< tmp_total_len << " bytes returned by server.";
ex.addContext("Calling XrdFile::readv()");
addConnection(ex);
throw ex;
}
total_len += tmp_total_len;
}
return total_len;
}
Expand Down

0 comments on commit a02b5d1

Please sign in to comment.