Skip to content

Commit

Permalink
Merge pull request #885 from casacore/issue882
Browse files Browse the repository at this point in the history
Added support for O_DIRECT
  • Loading branch information
tammojan committed Mar 18, 2019
2 parents 951991b + 3416a8e commit 1a6945a
Show file tree
Hide file tree
Showing 14 changed files with 473 additions and 67 deletions.
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cmake_minimum_required (VERSION 2.6.0)
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckFunctionExists)
include(CheckCXXSourceCompiles)

# fixes warnings on cmake 3.x+ For now we want the old behavior to be backwards compatible
if (POLICY CMP0048)
Expand Down Expand Up @@ -50,6 +51,15 @@ option (ENABLE_SHARED "Build shared libraries" YES)

option (ENABLE_RPATH "Include rpath in executables and shared libraries" YES)

# Check if O_DIRECT is available.
# Note this does not work well with check_c_source_compiles.
check_cxx_source_compiles("
#include <fcntl.h>
main() { int i = O_DIRECT; }
" HAVE_O_DIRECT)
if (HAVE_O_DIRECT)
add_definitions(-DHAVE_O_DIRECT)
endif()

# By default do not use ADIOS2, HDF5, FFTW3
option (ENABLE_TABLELOCKING "Make locking for concurrent table access possible" YES)
Expand Down Expand Up @@ -537,6 +547,7 @@ message (STATUS "USE_THREADS ........... = ${USE_THREADS}")
message (STATUS "USE_OPENMP ............ = ${USE_OPENMP}")
message (STATUS "USE_MPI ............... = ${USE_MPI}")
message (STATUS "USE_STACKTRACE ........ = ${USE_STACKTRACE}")
message (STATUS "HAVE_O_DIRECT ......... = ${HAVE_O_DIRECT}")
message (STATUS "CMAKE_CXX_COMPILER .... = ${CMAKE_CXX_COMPILER}")
message (STATUS "CMAKE_CXX_FLAGS ....... = ${CMAKE_CXX_FLAGS}")
message (STATUS "DATA directory ........ = ${DATA_DIR}")
Expand Down
22 changes: 14 additions & 8 deletions casa/IO/MultiFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
namespace casacore { //# NAMESPACE CASACORE - BEGIN

MultiFile::MultiFile (const String& name, ByteIO::OpenOption option,
Int blockSize)
: MultiFileBase (name, blockSize)
Int blockSize, Bool useODirect)
: MultiFileBase (name, blockSize, useODirect)
{
itsFD = RegularFileIO::openCreate (itsName, option);
itsFD = RegularFileIO::openCreate (itsName, option, itsUseODirect);
itsIO.attach (itsFD, itsName);
if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
// New file; first block is for administration.
Expand Down Expand Up @@ -78,7 +78,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
return;
}
// First try if the file can be opened as read/write.
int fd = RegularFileIO::openCreate (itsName, ByteIO::Update);
int fd = RegularFileIO::openCreate (itsName, ByteIO::Update, itsUseODirect);
// Now close the readonly file and reset fd.
FiledesIO::close (itsFD);
itsIO.detach();
Expand Down Expand Up @@ -112,8 +112,15 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
uChar* buf = const_cast<uChar*>(mio.getBuffer());
CanonicalConversion::fromLocal (buf, todo); // header size
// Write the first part of the buffer at the beginning of the file.
// Use an aligned buffer for possible O_DIRECT alignment.
itsIO.seek (0);
itsIO.write (itsBlockSize, buf);
if (itsUseODirect) {
MultiFileBuffer mfbuf(itsBlockSize, itsUseODirect);
memcpy (mfbuf.data, buf, itsBlockSize);
itsIO.pwrite (itsBlockSize, 0, mfbuf.data);
} else {
itsIO.pwrite (itsBlockSize, 0, buf);
}
todo -= itsBlockSize;
if (todo > 0) {
// The rest is written in another file. If the header info was written
Expand Down Expand Up @@ -168,7 +175,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
iter!=itsInfo.end(); ++iter) {
iter->curBlock = -1;
iter->dirty = False;
iter->buffer.resize (itsBlockSize);
iter->allocBuffer (itsBlockSize, itsUseODirect);
}
}

Expand Down Expand Up @@ -207,8 +214,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
void MultiFile::readBlock (MultiFileInfo& info, Int64 blknr,
void* buffer)
{
itsIO.pread (itsBlockSize, info.blockNrs[blknr] *
itsBlockSize, buffer);
itsIO.pread (itsBlockSize, info.blockNrs[blknr] * itsBlockSize, buffer);
}

void MultiFile::writeBlock (MultiFileInfo& info, Int64 blknr,
Expand Down
9 changes: 7 additions & 2 deletions casa/IO/MultiFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// reduce the number of open files (especially when concatenating tables).
// <br>A secondary goal is offering the ability to use an IO buffer size
// that matches the file system well (large buffer size for e.g. ZFS).
// <br>A third goal is offering the ability to use O_DIRECT (if supported by OS)
// to tell the OS kernel to bypass its file cache. It makes the I/O behaviour
// more predictable which a real-time system might need.
//
// The SetupNewTable constructor has a StorageOption argument to define
// if a MultiFile has to be used and if so, the buffer size to use.
// It is also possible to specify that through aipsrc variables.
//

// A virtual file is spread over multiple (fixed size) data blocks in the
// MultiFile. A data block is never shared by multiple files.
// For each virtual file MultiFile keeps a MultiFileInfo object telling
Expand Down Expand Up @@ -111,7 +113,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// Open or create a MultiFile with the given name.
// Upon creation the block size can be given. If 0, it uses the block size
// of the file system the file is on.
MultiFile (const String& name, ByteIO::OpenOption, Int blockSize=0);
// If useODirect=True, the O_DIRECT flag in used (if supported). It tells the
// kernel to bypass its file cache to have more predictable I/O behaviour.
MultiFile (const String& name, ByteIO::OpenOption, Int blockSize=0,
Bool useODirect=False);

// The destructor flushes and closes the file.
virtual ~MultiFile();
Expand Down
76 changes: 58 additions & 18 deletions casa/IO/MultiFileBase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@
#include <casacore/casa/BasicSL/STLIO.h>
#include <casacore/casa/Utilities/Assert.h>
#include <casacore/casa/Exceptions/Error.h>
#include <casacore/casa/OS/File.h> // for fileFSTAT
#include <casacore/casa/OS/File.h> // for fileSTAT
#include <sys/stat.h> // needed for stat or stat64
#include <string.h>
#include <stdlib.h> // for posix_memalign

namespace casacore { //# NAMESPACE CASACORE - BEGIN

Expand All @@ -46,12 +47,18 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
{ ios >> info.name >> info.blockNrs >> info.fsize; }


MultiFileBase::MultiFileBase (const String& name, Int blockSize)
MultiFileBase::MultiFileBase (const String& name, Int blockSize, Bool useODirect)
: itsBlockSize (blockSize),
itsNrBlock (0),
itsHdrCounter (0),
itsUseODirect (useODirect),
itsWritable (False), // usually reset by derived class
itsChanged (False)
{
// Unset itsUseODirect if the OS does not support it.
#ifndef HAVE_O_DIRECT
itsUseODirect = False;
#endif
itsName = Path(name).expandedName();
}

Expand Down Expand Up @@ -111,6 +118,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
}
char* buffer = static_cast<char*>(buf);
MultiFileInfo& info = itsInfo[fileId];
char* infoBuffer = info.buffer->data;
// Determine the logical block to read and the start offset in that block.
Int64 nrblk = (info.fsize + itsBlockSize - 1) / itsBlockSize;
Int64 blknr = offset/itsBlockSize;
Expand All @@ -123,19 +131,19 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 todo = std::min(szdo-done, itsBlockSize-start);
// If already in buffer, copy from there.
if (blknr == info.curBlock) {
memcpy (buffer, &(info.buffer[start]), todo);
memcpy (buffer, infoBuffer+start, todo);
} else {
// Read directly into buffer if it fits exactly.
if (todo == itsBlockSize) {
// Read directly into buffer if it fits exactly and no O_DIRECT.
if (todo == itsBlockSize && !itsUseODirect) {
readBlock (info, blknr, buffer);
} else {
if (info.dirty) {
writeDirty (info);
}
// Read into file buffer and copy correct part.
readBlock (info, blknr, &(info.buffer[0]));
readBlock (info, blknr, infoBuffer);
info.curBlock = blknr;
memcpy (buffer, &(info.buffer[start]), todo);
memcpy (buffer, infoBuffer+start, todo);
}
}
// Increment counters.
Expand All @@ -156,6 +164,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
const char* buffer = static_cast<const char*>(buf);
AlwaysAssert (itsWritable, AipsError);
MultiFileInfo& info = itsInfo[fileId];
char* infoBuffer = info.buffer->data;
// Determine the logical block to write and the start offset in that block.
Int64 blknr = offset/itsBlockSize;
Int64 start = offset - blknr*itsBlockSize;
Expand All @@ -172,13 +181,13 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 todo = std::min(size-done, itsBlockSize-start);
// Favor sequential writing, thus write current buffer first.
if (blknr == info.curBlock) {
memcpy (&(info.buffer[start]), buffer, todo);
memcpy (infoBuffer+start, buffer, todo);
info.dirty = True;
if (done+todo > size) {
writeDirty (info);
}
} else if (todo == itsBlockSize) {
// Write directly from buffer if it fits exactly.
} else if (todo == itsBlockSize && !itsUseODirect) {
// Write directly from buffer if it fits exactly and no O_DIRECT.
writeBlock (info, blknr, buffer);
} else {
// Write into temporary buffer and copy correct part.
Expand All @@ -187,12 +196,12 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
writeDirty (info);
}
if (blknr >= curnrb) {
memset (&(info.buffer[0]), 0, itsBlockSize);
memset (infoBuffer, 0, itsBlockSize);
} else {
readBlock (info, blknr, &(info.buffer[0]));
readBlock (info, blknr, infoBuffer);
}
info.curBlock = blknr;
memcpy (&(info.buffer[start]), buffer, todo);
memcpy (infoBuffer+start, buffer, todo);
info.dirty = True;
}
done += todo;
Expand Down Expand Up @@ -242,7 +251,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
if (inx == itsInfo.size()) {
itsInfo.resize (inx+1);
}
itsInfo[inx] = MultiFileInfo(itsBlockSize);
itsInfo[inx] = MultiFileInfo(itsBlockSize, itsUseODirect);
itsInfo[inx].name = bname;
doAddFile (itsInfo[inx]);
itsChanged = True;
Expand Down Expand Up @@ -278,13 +287,44 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
}



MultiFileInfo::MultiFileInfo (Int64 bufSize)
MultiFileInfo::MultiFileInfo (Int64 bufSize, Bool useODirect)
: curBlock (-1),
fsize (0),
dirty (False)
dirty (False),
buffer (new MultiFileBuffer (bufSize, useODirect))
{}


MultiFileBuffer::MultiFileBuffer (size_t bufSize, Bool useODirect)
: data (0)
{
buffer.resize (bufSize);
const size_t align = 4096;
// Free buffer (in case used).
if (data) {
free (data);
data = 0;
}
if (bufSize > 0) {
if (useODirect && bufSize%align != 0) {
throw AipsError("MultiFile bufsize " + String::toString(bufSize) +
" must be a multiple of 4096 when using O_DIRECT");
}
// Note that the error messages do a malloc as well, but small
// compared to the requested malloc, so they'll probably succeed.
void* ptr;
if (useODirect) {
if (posix_memalign (&ptr, align, bufSize) != 0) {
throw AllocError("MultiFileBuffer: failed to allocate aligned buffer",
bufSize);
}
} else {
ptr = malloc (bufSize);
if (!ptr) {
throw AllocError("MultiFileBuffer: failed to allocate buffer", bufSize);
}
}
data = static_cast<char*>(ptr);
}
}


Expand Down
53 changes: 46 additions & 7 deletions casa/IO/MultiFileBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,47 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN


// <summary>
// Helper class for MultiFileBase containing info per internal file
// Helper class for MultiFileInfo holding a data buffer
// </summary>
// <synopsis>
// The buffer can be allocated with posix_memalign (for O_DIRECT support).
// Hence the memory must be freed using free, which makes it impossible
// to use a shared_ptr to that memory. Hence it is encapsulated in this class.
// </synopsis>
struct MultiFileBuffer {
MultiFileBuffer (size_t bufSize, Bool useODirect);
~MultiFileBuffer()
{ if (data) free (data); }
// Data member
char* data;
private:
MultiFileBuffer (const MultiFileBuffer&);
MultiFileBuffer& operator= (const MultiFileBuffer&);
};

// <summary>
// Helper class for MultiFileBase containing info per internal file.
// </summary>
// <synopsis>
// This struct defines the various fields describing a logical file in a
// class derived from MultiFileBase (such as MultiFile or MultiHDF5).
// </synopsis>
// <use visibility=local>
struct MultiFileInfo {
explicit MultiFileInfo (Int64 bufSize=0);
// Initialize the object and create the buffer with the proper size.
// If align>1 (for use of O_DIRECT), the buffer is properly aligned and it
// is ensured that its size is a multiple of the alignment.
explicit MultiFileInfo (Int64 bufSize=0, Bool useODirect=False);
// Allocate the buffer.
void allocBuffer (Int64 bufSize, Bool useODirect=False)
{ buffer = std::shared_ptr<MultiFileBuffer> (new MultiFileBuffer(bufSize, useODirect)); }
//# Data members.
vector<Int64> blockNrs; // physical blocknrs for this logical file
vector<char> buffer; // buffer holding a data block
Int64 curBlock; // the data block held in buffer (<0 is none)
Int64 fsize; // file size (in bytes)
String name; // the virtual file name
Bool dirty; // has data in buffer been changed?
std::shared_ptr<MultiFileBuffer> buffer; // buffer holding a data block
CountedPtr<HDF5Group> group;
CountedPtr<HDF5DataSet> dataSet;
};
Expand Down Expand Up @@ -137,7 +167,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// Open or create a MultiFileBase with the given name.
// Upon creation the block size can be given. If 0, it uses the block size
// of the file system the file is on.
MultiFileBase (const String& name, Int blockSize=0);
// If useODIrect=True, it means that O_DIRECT is used. If the OS does not
// support it, the flag will always be False. If True, the data buffers will
// have a proper alignment and size (as needed by O_DIRECT).
MultiFileBase (const String& name, Int blockSize, Bool useODirect);

// The destructor flushes and closes the file.
virtual ~MultiFileBase();
Expand Down Expand Up @@ -185,6 +218,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Bool isWritable() const
{ return itsWritable; }

// Will O_DIRECT be used?
Bool useODirect() const
{ return itsUseODirect; }

// Get the block size used.
Int64 blockSize() const
{ return itsBlockSize; }
Expand All @@ -207,7 +244,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
private:
void writeDirty (MultiFileInfo& info)
{
writeBlock (info, info.curBlock, &(info.buffer[0]));
writeBlock (info, info.curBlock, info.buffer->data);
info.dirty = False;
}

Expand Down Expand Up @@ -243,8 +280,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 itsNrBlock; // The total nr of blocks actually used
Int64 itsHdrCounter; // Counter of header changes
vector<MultiFileInfo> itsInfo;
Bool itsWritable; // Is the file writable?
Bool itsChanged; // Has header info changed since last flush?
std::shared_ptr<MultiFileBuffer> itsBuffer;
Bool itsUseODirect; // use O_DIRECT?
Bool itsWritable; // Is the file writable?
Bool itsChanged; // Has header info changed since last flush?
vector<Int64> itsFreeBlocks;
};

Expand Down
2 changes: 1 addition & 1 deletion casa/IO/MultiHDF5.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN

MultiHDF5::MultiHDF5 (const String& name, ByteIO::OpenOption option,
Int blockSize)
: MultiFileBase (name, blockSize),
: MultiFileBase (name, blockSize, False), //# no O_DIRECT in HDF5
itsFile (itsName, option)
{
if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
Expand Down
Loading

0 comments on commit 1a6945a

Please sign in to comment.