Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for O_DIRECT #885

Merged
merged 2 commits into from
Mar 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cmake_minimum_required (VERSION 2.6.0)
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckFunctionExists)
include(CheckCXXSourceCompiles)

# fixes warnings on cmake 3.x+ For now we want the old behavior to be backwards compatible
if (POLICY CMP0048)
Expand Down Expand Up @@ -50,6 +51,15 @@ option (ENABLE_SHARED "Build shared libraries" YES)

option (ENABLE_RPATH "Include rpath in executables and shared libraries" YES)

# Check if O_DIRECT is available.
# Note this does not work well with check_c_source_compiles.
check_cxx_source_compiles("
#include <fcntl.h>
main() { int i = O_DIRECT; }
" HAVE_O_DIRECT)
if (HAVE_O_DIRECT)
add_definitions(-DHAVE_O_DIRECT)
endif()

# By default do not use ADIOS2, HDF5, FFTW3
option (ENABLE_TABLELOCKING "Make locking for concurrent table access possible" YES)
Expand Down Expand Up @@ -537,6 +547,7 @@ message (STATUS "USE_THREADS ........... = ${USE_THREADS}")
message (STATUS "USE_OPENMP ............ = ${USE_OPENMP}")
message (STATUS "USE_MPI ............... = ${USE_MPI}")
message (STATUS "USE_STACKTRACE ........ = ${USE_STACKTRACE}")
message (STATUS "HAVE_O_DIRECT ......... = ${HAVE_O_DIRECT}")
message (STATUS "CMAKE_CXX_COMPILER .... = ${CMAKE_CXX_COMPILER}")
message (STATUS "CMAKE_CXX_FLAGS ....... = ${CMAKE_CXX_FLAGS}")
message (STATUS "DATA directory ........ = ${DATA_DIR}")
Expand Down
22 changes: 14 additions & 8 deletions casa/IO/MultiFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
namespace casacore { //# NAMESPACE CASACORE - BEGIN

MultiFile::MultiFile (const String& name, ByteIO::OpenOption option,
Int blockSize)
: MultiFileBase (name, blockSize)
Int blockSize, Bool useODirect)
: MultiFileBase (name, blockSize, useODirect)
{
itsFD = RegularFileIO::openCreate (itsName, option);
itsFD = RegularFileIO::openCreate (itsName, option, itsUseODirect);
itsIO.attach (itsFD, itsName);
if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
// New file; first block is for administration.
Expand Down Expand Up @@ -78,7 +78,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
return;
}
// First try if the file can be opened as read/write.
int fd = RegularFileIO::openCreate (itsName, ByteIO::Update);
int fd = RegularFileIO::openCreate (itsName, ByteIO::Update, itsUseODirect);
// Now close the readonly file and reset fd.
FiledesIO::close (itsFD);
itsIO.detach();
Expand Down Expand Up @@ -112,8 +112,15 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
uChar* buf = const_cast<uChar*>(mio.getBuffer());
CanonicalConversion::fromLocal (buf, todo); // header size
// Write the first part of the buffer at the beginning of the file.
// Use an aligned buffer for possible O_DIRECT alignment.
itsIO.seek (0);
itsIO.write (itsBlockSize, buf);
if (itsUseODirect) {
MultiFileBuffer mfbuf(itsBlockSize, itsUseODirect);
memcpy (mfbuf.data, buf, itsBlockSize);
itsIO.pwrite (itsBlockSize, 0, mfbuf.data);
} else {
itsIO.pwrite (itsBlockSize, 0, buf);
}
todo -= itsBlockSize;
if (todo > 0) {
// The rest is written in another file. If the header info was written
Expand Down Expand Up @@ -168,7 +175,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
iter!=itsInfo.end(); ++iter) {
iter->curBlock = -1;
iter->dirty = False;
iter->buffer.resize (itsBlockSize);
iter->allocBuffer (itsBlockSize, itsUseODirect);
}
}

Expand Down Expand Up @@ -207,8 +214,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
void MultiFile::readBlock (MultiFileInfo& info, Int64 blknr,
void* buffer)
{
itsIO.pread (itsBlockSize, info.blockNrs[blknr] *
itsBlockSize, buffer);
itsIO.pread (itsBlockSize, info.blockNrs[blknr] * itsBlockSize, buffer);
}

void MultiFile::writeBlock (MultiFileInfo& info, Int64 blknr,
Expand Down
9 changes: 7 additions & 2 deletions casa/IO/MultiFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// reduce the number of open files (especially when concatenating tables).
// <br>A secondary goal is offering the ability to use an IO buffer size
// that matches the file system well (large buffer size for e.g. ZFS).
// <br>A third goal is offering the ability to use O_DIRECT (if supported by OS)
// to tell the OS kernel to bypass its file cache. It makes the I/O behaviour
// more predictable which a real-time system might need.
//
// The SetupNewTable constructor has a StorageOption argument to define
// if a MultiFile has to be used and if so, the buffer size to use.
// It is also possible to specify that through aipsrc variables.
//

// A virtual file is spread over multiple (fixed size) data blocks in the
// MultiFile. A data block is never shared by multiple files.
// For each virtual file MultiFile keeps a MultiFileInfo object telling
Expand Down Expand Up @@ -111,7 +113,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// Open or create a MultiFile with the given name.
// Upon creation the block size can be given. If 0, it uses the block size
// of the file system the file is on.
MultiFile (const String& name, ByteIO::OpenOption, Int blockSize=0);
// If useODirect=True, the O_DIRECT flag in used (if supported). It tells the
// kernel to bypass its file cache to have more predictable I/O behaviour.
MultiFile (const String& name, ByteIO::OpenOption, Int blockSize=0,
Bool useODirect=False);

// The destructor flushes and closes the file.
virtual ~MultiFile();
Expand Down
76 changes: 58 additions & 18 deletions casa/IO/MultiFileBase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@
#include <casacore/casa/BasicSL/STLIO.h>
#include <casacore/casa/Utilities/Assert.h>
#include <casacore/casa/Exceptions/Error.h>
#include <casacore/casa/OS/File.h> // for fileFSTAT
#include <casacore/casa/OS/File.h> // for fileSTAT
#include <sys/stat.h> // needed for stat or stat64
#include <string.h>
#include <stdlib.h> // for posix_memalign

namespace casacore { //# NAMESPACE CASACORE - BEGIN

Expand All @@ -46,12 +47,18 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
{ ios >> info.name >> info.blockNrs >> info.fsize; }


MultiFileBase::MultiFileBase (const String& name, Int blockSize)
MultiFileBase::MultiFileBase (const String& name, Int blockSize, Bool useODirect)
: itsBlockSize (blockSize),
itsNrBlock (0),
itsHdrCounter (0),
itsUseODirect (useODirect),
itsWritable (False), // usually reset by derived class
itsChanged (False)
{
// Unset itsUseODirect if the OS does not support it.
#ifndef HAVE_O_DIRECT
itsUseODirect = False;
#endif
itsName = Path(name).expandedName();
}

Expand Down Expand Up @@ -111,6 +118,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
}
char* buffer = static_cast<char*>(buf);
MultiFileInfo& info = itsInfo[fileId];
char* infoBuffer = info.buffer->data;
// Determine the logical block to read and the start offset in that block.
Int64 nrblk = (info.fsize + itsBlockSize - 1) / itsBlockSize;
Int64 blknr = offset/itsBlockSize;
Expand All @@ -123,19 +131,19 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 todo = std::min(szdo-done, itsBlockSize-start);
// If already in buffer, copy from there.
if (blknr == info.curBlock) {
memcpy (buffer, &(info.buffer[start]), todo);
memcpy (buffer, infoBuffer+start, todo);
} else {
// Read directly into buffer if it fits exactly.
if (todo == itsBlockSize) {
// Read directly into buffer if it fits exactly and no O_DIRECT.
if (todo == itsBlockSize && !itsUseODirect) {
readBlock (info, blknr, buffer);
} else {
if (info.dirty) {
writeDirty (info);
}
// Read into file buffer and copy correct part.
readBlock (info, blknr, &(info.buffer[0]));
readBlock (info, blknr, infoBuffer);
info.curBlock = blknr;
memcpy (buffer, &(info.buffer[start]), todo);
memcpy (buffer, infoBuffer+start, todo);
}
}
// Increment counters.
Expand All @@ -156,6 +164,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
const char* buffer = static_cast<const char*>(buf);
AlwaysAssert (itsWritable, AipsError);
MultiFileInfo& info = itsInfo[fileId];
char* infoBuffer = info.buffer->data;
// Determine the logical block to write and the start offset in that block.
Int64 blknr = offset/itsBlockSize;
Int64 start = offset - blknr*itsBlockSize;
Expand All @@ -172,13 +181,13 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 todo = std::min(size-done, itsBlockSize-start);
// Favor sequential writing, thus write current buffer first.
if (blknr == info.curBlock) {
memcpy (&(info.buffer[start]), buffer, todo);
memcpy (infoBuffer+start, buffer, todo);
info.dirty = True;
if (done+todo > size) {
writeDirty (info);
}
} else if (todo == itsBlockSize) {
// Write directly from buffer if it fits exactly.
} else if (todo == itsBlockSize && !itsUseODirect) {
// Write directly from buffer if it fits exactly and no O_DIRECT.
writeBlock (info, blknr, buffer);
} else {
// Write into temporary buffer and copy correct part.
Expand All @@ -187,12 +196,12 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
writeDirty (info);
}
if (blknr >= curnrb) {
memset (&(info.buffer[0]), 0, itsBlockSize);
memset (infoBuffer, 0, itsBlockSize);
} else {
readBlock (info, blknr, &(info.buffer[0]));
readBlock (info, blknr, infoBuffer);
}
info.curBlock = blknr;
memcpy (&(info.buffer[start]), buffer, todo);
memcpy (infoBuffer+start, buffer, todo);
info.dirty = True;
}
done += todo;
Expand Down Expand Up @@ -242,7 +251,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
if (inx == itsInfo.size()) {
itsInfo.resize (inx+1);
}
itsInfo[inx] = MultiFileInfo(itsBlockSize);
itsInfo[inx] = MultiFileInfo(itsBlockSize, itsUseODirect);
itsInfo[inx].name = bname;
doAddFile (itsInfo[inx]);
itsChanged = True;
Expand Down Expand Up @@ -278,13 +287,44 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
}



MultiFileInfo::MultiFileInfo (Int64 bufSize)
MultiFileInfo::MultiFileInfo (Int64 bufSize, Bool useODirect)
: curBlock (-1),
fsize (0),
dirty (False)
dirty (False),
buffer (new MultiFileBuffer (bufSize, useODirect))
{}


MultiFileBuffer::MultiFileBuffer (size_t bufSize, Bool useODirect)
: data (0)
{
buffer.resize (bufSize);
const size_t align = 4096;
// Free buffer (in case used).
if (data) {
free (data);
data = 0;
}
if (bufSize > 0) {
if (useODirect && bufSize%align != 0) {
throw AipsError("MultiFile bufsize " + String::toString(bufSize) +
" must be a multiple of 4096 when using O_DIRECT");
}
// Note that the error messages do a malloc as well, but small
// compared to the requested malloc, so they'll probably succeed.
void* ptr;
if (useODirect) {
if (posix_memalign (&ptr, align, bufSize) != 0) {
throw AllocError("MultiFileBuffer: failed to allocate aligned buffer",
bufSize);
}
} else {
ptr = malloc (bufSize);
if (!ptr) {
throw AllocError("MultiFileBuffer: failed to allocate buffer", bufSize);
}
}
data = static_cast<char*>(ptr);
}
}


Expand Down
53 changes: 46 additions & 7 deletions casa/IO/MultiFileBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,47 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN


// <summary>
// Helper class for MultiFileBase containing info per internal file
// Helper class for MultiFileInfo holding a data buffer
// </summary>
// <synopsis>
// The buffer can be allocated with posix_memalign (for O_DIRECT support).
// Hence the memory must be freed using free, which makes it impossible
// to use a shared_ptr to that memory. Hence it is encapsulated in this class.
// </synopsis>
struct MultiFileBuffer {
MultiFileBuffer (size_t bufSize, Bool useODirect);
~MultiFileBuffer()
{ if (data) free (data); }
// Data member
char* data;
private:
MultiFileBuffer (const MultiFileBuffer&);
MultiFileBuffer& operator= (const MultiFileBuffer&);
};

// <summary>
// Helper class for MultiFileBase containing info per internal file.
// </summary>
// <synopsis>
// This struct defines the various fields describing a logical file in a
// class derived from MultiFileBase (such as MultiFile or MultiHDF5).
// </synopsis>
// <use visibility=local>
struct MultiFileInfo {
explicit MultiFileInfo (Int64 bufSize=0);
// Initialize the object and create the buffer with the proper size.
// If align>1 (for use of O_DIRECT), the buffer is properly aligned and it
// is ensured that its size is a multiple of the alignment.
explicit MultiFileInfo (Int64 bufSize=0, Bool useODirect=False);
// Allocate the buffer.
void allocBuffer (Int64 bufSize, Bool useODirect=False)
{ buffer = std::shared_ptr<MultiFileBuffer> (new MultiFileBuffer(bufSize, useODirect)); }
//# Data members.
vector<Int64> blockNrs; // physical blocknrs for this logical file
vector<char> buffer; // buffer holding a data block
Int64 curBlock; // the data block held in buffer (<0 is none)
Int64 fsize; // file size (in bytes)
String name; // the virtual file name
Bool dirty; // has data in buffer been changed?
std::shared_ptr<MultiFileBuffer> buffer; // buffer holding a data block
CountedPtr<HDF5Group> group;
CountedPtr<HDF5DataSet> dataSet;
};
Expand Down Expand Up @@ -137,7 +167,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
// Open or create a MultiFileBase with the given name.
// Upon creation the block size can be given. If 0, it uses the block size
// of the file system the file is on.
MultiFileBase (const String& name, Int blockSize=0);
// If useODIrect=True, it means that O_DIRECT is used. If the OS does not
// support it, the flag will always be False. If True, the data buffers will
// have a proper alignment and size (as needed by O_DIRECT).
MultiFileBase (const String& name, Int blockSize, Bool useODirect);

// The destructor flushes and closes the file.
virtual ~MultiFileBase();
Expand Down Expand Up @@ -185,6 +218,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Bool isWritable() const
{ return itsWritable; }

// Will O_DIRECT be used?
Bool useODirect() const
{ return itsUseODirect; }

// Get the block size used.
Int64 blockSize() const
{ return itsBlockSize; }
Expand All @@ -207,7 +244,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
private:
void writeDirty (MultiFileInfo& info)
{
writeBlock (info, info.curBlock, &(info.buffer[0]));
writeBlock (info, info.curBlock, info.buffer->data);
info.dirty = False;
}

Expand Down Expand Up @@ -243,8 +280,10 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN
Int64 itsNrBlock; // The total nr of blocks actually used
Int64 itsHdrCounter; // Counter of header changes
vector<MultiFileInfo> itsInfo;
Bool itsWritable; // Is the file writable?
Bool itsChanged; // Has header info changed since last flush?
std::shared_ptr<MultiFileBuffer> itsBuffer;
Bool itsUseODirect; // use O_DIRECT?
Bool itsWritable; // Is the file writable?
Bool itsChanged; // Has header info changed since last flush?
vector<Int64> itsFreeBlocks;
};

Expand Down
2 changes: 1 addition & 1 deletion casa/IO/MultiHDF5.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace casacore { //# NAMESPACE CASACORE - BEGIN

MultiHDF5::MultiHDF5 (const String& name, ByteIO::OpenOption option,
Int blockSize)
: MultiFileBase (name, blockSize),
: MultiFileBase (name, blockSize, False), //# no O_DIRECT in HDF5
itsFile (itsName, option)
{
if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
Expand Down