From 662c32fbfeba987f2b04c7a052c852d8e840607f Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 30 Jul 2024 14:24:02 +0200 Subject: [PATCH 01/37] [web] fix typo in tgeomanager painter creation --- geom/geom/src/TGeoManager.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geom/geom/src/TGeoManager.cxx b/geom/geom/src/TGeoManager.cxx index 77c05e0d085e8..c79e072100e2b 100644 --- a/geom/geom/src/TGeoManager.cxx +++ b/geom/geom/src/TGeoManager.cxx @@ -2917,7 +2917,7 @@ TVirtualGeoPainter *TGeoManager::GetGeomPainter() { if (!fPainter) { const char *kind = gEnv->GetValue("GeomPainter.Name", ""); - if (!kind || !*kind); + if (!kind || !*kind) kind = (gROOT->IsWebDisplay() && !gROOT->IsWebDisplayBatch()) ? "web" : "root"; if (auto h = gROOT->GetPluginManager()->FindHandler("TVirtualGeoPainter", kind)) { if (h->LoadPlugin() == -1) { From 681c6bdca5c9b78d8a10d58fb7175ead43a9b1b3 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 30 Jul 2024 14:29:03 +0200 Subject: [PATCH 02/37] [jsroot] dev 30/07/2024 Fix zooming on haxis --- js/build/jsroot.js | 29 +++++++++++++++++++---------- js/modules/core.mjs | 2 +- js/modules/gpad/TFramePainter.mjs | 27 ++++++++++++++++++--------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/js/build/jsroot.js b/js/build/jsroot.js index 38e1921d7401a..8ae63a3df4689 100644 --- a/js/build/jsroot.js +++ b/js/build/jsroot.js @@ -11,7 +11,7 @@ const version_id = 'dev', /** @summary version date * @desc Release date in format day/month/year like '14/04/2022' */ -version_date = '26/07/2024', +version_date = '30/07/2024', /** @summary version id and date * @desc Produced by concatenation of {@link version_id} and {@link version_date} @@ -64433,24 +64433,33 @@ const TooltipHandler = { case 3: this.zoom_curr[1] = m[1]; changed[0] = false; break; // only Y } - const idx = this.swap_xy ? 1 : 0, idy = 1 - idx; let xmin, xmax, ymin, ymax, isany = false, namex = 'x', namey = 'y'; - if (changed[idx] && (Math.abs(this.zoom_curr[idx] - this.zoom_origin[idx]) > 10)) { - if (this.zoom_second && (this.zoom_kind === 2)) namex = 'x2'; - xmin = Math.min(this.revertAxis(namex, this.zoom_origin[idx]), this.revertAxis(namex, this.zoom_curr[idx])); - xmax = Math.max(this.revertAxis(namex, this.zoom_origin[idx]), this.revertAxis(namex, this.zoom_curr[idx])); + if (changed[0] && (Math.abs(this.zoom_curr[0] - this.zoom_origin[0]) > 5)) { + if (this.zoom_second && (this.zoom_kind === 2)) + namex = 'x2'; + const v1 = this.revertAxis(namex, this.zoom_origin[0]), + v2 = this.revertAxis(namex, this.zoom_curr[0]); + xmin = Math.min(v1, v2); + xmax = Math.max(v1, v2); isany = true; } - if (changed[idy] && (Math.abs(this.zoom_curr[idy] - this.zoom_origin[idy]) > 10)) { - if (this.zoom_second && (this.zoom_kind === 3)) namey = 'y2'; - ymin = Math.min(this.revertAxis(namey, this.zoom_origin[idy]), this.revertAxis(namey, this.zoom_curr[idy])); - ymax = Math.max(this.revertAxis(namey, this.zoom_origin[idy]), this.revertAxis(namey, this.zoom_curr[idy])); + if (changed[1] && (Math.abs(this.zoom_curr[1] - this.zoom_origin[1]) > 5)) { + if (this.zoom_second && (this.zoom_kind === 3)) + namey = 'y2'; + + const v1 = this.revertAxis(namey, this.zoom_origin[1]), + v2 = this.revertAxis(namey, this.zoom_curr[1]); + ymin = Math.min(v1, v2); + ymax = Math.max(v1, v2); isany = true; } + if (this.swap_xy && !this.zoom_second) + [xmin, xmax, ymin, ymax] = [ymin, ymax, xmin, xmax]; + if (namex === 'x2') { pr = this.zoomSingle(namex, xmin, xmax, true); kind = 0; diff --git a/js/modules/core.mjs b/js/modules/core.mjs index 64c52b5475f94..bb777d7a64748 100644 --- a/js/modules/core.mjs +++ b/js/modules/core.mjs @@ -4,7 +4,7 @@ const version_id = 'dev', /** @summary version date * @desc Release date in format day/month/year like '14/04/2022' */ -version_date = '26/07/2024', +version_date = '30/07/2024', /** @summary version id and date * @desc Produced by concatenation of {@link version_id} and {@link version_date} diff --git a/js/modules/gpad/TFramePainter.mjs b/js/modules/gpad/TFramePainter.mjs index 44cbc7d7438a2..9113c0731454c 100644 --- a/js/modules/gpad/TFramePainter.mjs +++ b/js/modules/gpad/TFramePainter.mjs @@ -1129,24 +1129,33 @@ const TooltipHandler = { case 3: this.zoom_curr[1] = m[1]; changed[0] = false; break; // only Y } - const idx = this.swap_xy ? 1 : 0, idy = 1 - idx; let xmin, xmax, ymin, ymax, isany = false, namex = 'x', namey = 'y'; - if (changed[idx] && (Math.abs(this.zoom_curr[idx] - this.zoom_origin[idx]) > 10)) { - if (this.zoom_second && (this.zoom_kind === 2)) namex = 'x2'; - xmin = Math.min(this.revertAxis(namex, this.zoom_origin[idx]), this.revertAxis(namex, this.zoom_curr[idx])); - xmax = Math.max(this.revertAxis(namex, this.zoom_origin[idx]), this.revertAxis(namex, this.zoom_curr[idx])); + if (changed[0] && (Math.abs(this.zoom_curr[0] - this.zoom_origin[0]) > 5)) { + if (this.zoom_second && (this.zoom_kind === 2)) + namex = 'x2'; + const v1 = this.revertAxis(namex, this.zoom_origin[0]), + v2 = this.revertAxis(namex, this.zoom_curr[0]); + xmin = Math.min(v1, v2); + xmax = Math.max(v1, v2); isany = true; } - if (changed[idy] && (Math.abs(this.zoom_curr[idy] - this.zoom_origin[idy]) > 10)) { - if (this.zoom_second && (this.zoom_kind === 3)) namey = 'y2'; - ymin = Math.min(this.revertAxis(namey, this.zoom_origin[idy]), this.revertAxis(namey, this.zoom_curr[idy])); - ymax = Math.max(this.revertAxis(namey, this.zoom_origin[idy]), this.revertAxis(namey, this.zoom_curr[idy])); + if (changed[1] && (Math.abs(this.zoom_curr[1] - this.zoom_origin[1]) > 5)) { + if (this.zoom_second && (this.zoom_kind === 3)) + namey = 'y2'; + + const v1 = this.revertAxis(namey, this.zoom_origin[1]), + v2 = this.revertAxis(namey, this.zoom_curr[1]); + ymin = Math.min(v1, v2); + ymax = Math.max(v1, v2); isany = true; } + if (this.swap_xy && !this.zoom_second) + [xmin, xmax, ymin, ymax] = [ymin, ymax, xmin, xmax]; + if (namex === 'x2') { pr = this.zoomSingle(namex, xmin, xmax, true); kind = 0; From 225c7eef4e0850ebdf9458dd3098946aaf7fe07a Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 30 Jul 2024 17:56:54 +0200 Subject: [PATCH 03/37] [geom] do not use web display in parallel_world.C macro This is very special feature, which has no equivalent in web display --- tutorials/geom/parallel_world.C | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tutorials/geom/parallel_world.C b/tutorials/geom/parallel_world.C index 5d1428e2a4a10..5a9509552ac58 100644 --- a/tutorials/geom/parallel_world.C +++ b/tutorials/geom/parallel_world.C @@ -21,6 +21,9 @@ void align(); //______________________________________________________________________________ void parallel_world(Bool_t usepw=kTRUE, Bool_t useovlp=kTRUE) { + // web geometry display does not support "parallel world" feature + gROOT->SetWebDisplay("off"); + TGeoManager *geom = new TGeoManager("parallel_world", "Showcase for prioritized physical paths"); TGeoMaterial *matV = new TGeoMaterial("Vac", 0,0,0); TGeoMedium *medV = new TGeoMedium("MEDVAC",1,matV); @@ -60,7 +63,7 @@ void parallel_world(Bool_t usepw=kTRUE, Bool_t useovlp=kTRUE) top->AddNode(chip, i+1, new TGeoTranslation(0, -225.+50.*i, 10)); gGeoManager->CloseGeometry(); - TGeoParallelWorld *pw = 0; + TGeoParallelWorld *pw = nullptr; if (usepw) pw = gGeoManager->CreateParallelWorld("priority_sensors"); // Align chips align(); @@ -85,8 +88,10 @@ void parallel_world(Bool_t usepw=kTRUE, Bool_t useovlp=kTRUE) timer.Stop(); timer.Print(); TView3D *view = (TView3D*)gPad->GetView(); - view->SetParallel(); - view->Side(); + if (view) { + view->SetParallel(); + view->Side(); + } if (usepw) pw->PrintDetectedOverlaps(); } From 351a6462dc2a14173ecb9dc05d0466e771f87152 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Mon, 29 Jul 2024 18:58:08 +0200 Subject: [PATCH 04/37] [ntuple] Write first chunk of multi-key last This has the advantage that the offsets to all other chunks, that are logically located afterwards, are already known and can be written immediately instead of seeking back and patching them afterwards. The only downside is that the seek operation from the first to the second chunk during reading may be slightly more expensive because it is now backwards in the file, across the entire split blob. As a result of this change, RNTupleFileWriter in all cases writes into monotonically increasing offets, which simplifies further changes related to the support of Direct I/O. --- tree/ntuple/v7/src/RMiniFile.cxx | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index e5e2bd63935e7..36c7f5701e78b 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1523,12 +1523,9 @@ std::uint64_t ROOT::Experimental::Internal::RNTupleFileWriter::WriteBlob(const v const size_t nChunks = ComputeNumChunks(nbytes, maxKeySize); const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t); const size_t nbytesFirstChunk = maxKeySize - nbytesChunkOffsets; - // Write first chunk. - // Note that the last bytes we write here will be overridden later by the other chunks' - // offsets, and we're gonna write those bytes again in the following chunk. - const std::uint64_t firstOffset = writeKey(data, maxKeySize, maxKeySize); + // Skip writing the first chunk, it will be written last (in the file) below. - data = reinterpret_cast(data) + nbytesFirstChunk; + const uint8_t *chunkData = reinterpret_cast(data) + nbytesFirstChunk; size_t remainingBytes = nbytes - nbytesFirstChunk; const auto chunkOffsetsToWrite = std::make_unique(nChunks - 1); @@ -1536,22 +1533,21 @@ std::uint64_t ROOT::Experimental::Internal::RNTupleFileWriter::WriteBlob(const v do { const size_t bytesNextChunk = std::min(remainingBytes, maxKeySize); - const std::uint64_t offset = writeKey(data, bytesNextChunk, bytesNextChunk); + const std::uint64_t offset = writeKey(chunkData, bytesNextChunk, bytesNextChunk); RNTupleSerializer::SerializeUInt64(offset, &chunkOffsetsToWrite[chunkOffsetIdx]); ++chunkOffsetIdx; remainingBytes -= bytesNextChunk; - data = reinterpret_cast(data) + bytesNextChunk; + chunkData += bytesNextChunk; } while (remainingBytes > 0); - // patch the chunk offset into the first chunk - const std::uint64_t patchOffset = firstOffset + nbytesFirstChunk; - if (fFileSimple) - fFileSimple.Write(chunkOffsetsToWrite.get(), nbytesChunkOffsets, patchOffset); - else - fFileProper.Write(chunkOffsetsToWrite.get(), nbytesChunkOffsets, patchOffset); + // Write the first key, with part of the data and the pointers to (logically) following keys appended. + const std::uint64_t firstOffset = ReserveBlob(maxKeySize, maxKeySize); + WriteIntoReservedBlob(data, nbytesFirstChunk, firstOffset); + const std::uint64_t chunkOffsetsOffset = firstOffset + nbytesFirstChunk; + WriteIntoReservedBlob(chunkOffsetsToWrite.get(), nbytesChunkOffsets, chunkOffsetsOffset); return firstOffset; } From 690f4c1827eaedc221b02b418eb62c3913da44c4 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Wed, 31 Jul 2024 11:48:06 +0200 Subject: [PATCH 05/37] [ntuple] Refactor fseek implementation --- tree/ntuple/v7/src/RMiniFile.cxx | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index 36c7f5701e78b..d1224f247e829 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1305,17 +1305,24 @@ ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::~RFileSimple() fclose(fFile); } +namespace { +int FSeek64(FILE *stream, std::int64_t offset, int origin) +{ +#ifdef R__SEEK64 + return fseeko64(stream, offset, origin); +#else + return fseek(stream, offset, origin); +#endif +} +} // namespace + void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Write(const void *buffer, size_t nbytes, std::int64_t offset) { R__ASSERT(fFile); size_t retval; if ((offset >= 0) && (static_cast(offset) != fFilePos)) { -#ifdef R__SEEK64 - retval = fseeko64(fFile, offset, SEEK_SET); -#else - retval = fseek(fFile, offset, SEEK_SET); -#endif + retval = FSeek64(fFile, offset, SEEK_SET); if (retval) throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno))); fFilePos = offset; From 0bfd511e1a417ab839b1456f1e0738dc30bb0609 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Thu, 4 Apr 2024 16:52:26 +0200 Subject: [PATCH 06/37] [ntuple] Implement buffering in RFileSimple This has the advantage that we can precisely control the alignment, which is important to support Direct I/O. --- tree/ntuple/v7/inc/ROOT/RMiniFile.hxx | 20 ++++- tree/ntuple/v7/src/RMiniFile.cxx | 114 ++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 16 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx index ea210275ce66c..32122f21063c7 100644 --- a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx +++ b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx @@ -110,6 +110,22 @@ private: }; struct RFileSimple { + /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum + /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB. + static constexpr int kBlockAlign = 4096; + /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at + /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice, + /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be + /// aligned to kBlockAlign... + static constexpr std::size_t kHeaderBlockSize = 4096; + /// Testing suggests that 4MiB gives best performance at a reasonable memory consumption. + static constexpr std::size_t kBlockSize = 4 * 1024 * 1024; + + // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete. + unsigned char *fHeaderBlock; + std::uint64_t fBlockOffset = 0; + unsigned char *fBlock; + /// For the simplest cases, a C file stream can be used for writing FILE *fFile = nullptr; /// Keeps track of the seek offset @@ -119,13 +135,15 @@ private: /// Keeps track of TFile control structures, which need to be updated on committing the data set std::unique_ptr fControlBlock; - RFileSimple() = default; + RFileSimple(); RFileSimple(const RFileSimple &other) = delete; RFileSimple(RFileSimple &&other) = delete; RFileSimple &operator=(const RFileSimple &other) = delete; RFileSimple &operator=(RFileSimple &&other) = delete; ~RFileSimple(); + void Flush(); + /// Writes bytes in the open stream, either at fFilePos or at the given offset void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1); /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload. diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index d1224f247e829..6e46a4f7b4e05 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1299,10 +1299,23 @@ void ROOT::Experimental::Internal::RMiniFileReader::ReadBuffer(void *buffer, siz //////////////////////////////////////////////////////////////////////////////// +ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::RFileSimple() +{ + static_assert(kHeaderBlockSize % kBlockAlign == 0, "invalid header block size"); + static_assert(kBlockSize % kBlockAlign == 0, "invalid block size"); + std::align_val_t blockAlign{kBlockAlign}; + fHeaderBlock = static_cast(::operator new[](kHeaderBlockSize, blockAlign)); + fBlock = static_cast(::operator new[](kBlockSize, blockAlign)); +} + ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::~RFileSimple() { if (fFile) fclose(fFile); + + std::align_val_t blockAlign{kBlockAlign}; + ::operator delete[](fHeaderBlock, blockAlign); + ::operator delete[](fBlock, blockAlign); } namespace { @@ -1316,21 +1329,88 @@ int FSeek64(FILE *stream, std::int64_t offset, int origin) } } // namespace +void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Flush() +{ + // Write the last partially filled block. + // If it is the first block, get the updated header block. + if (fBlockOffset == 0) { + std::size_t headerBlockSize = kHeaderBlockSize; + if (headerBlockSize > fFilePos) { + headerBlockSize = fFilePos; + } + memcpy(fBlock, fHeaderBlock, headerBlockSize); + } + + std::size_t retval = FSeek64(fFile, fBlockOffset, SEEK_SET); + if (retval) + throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno))); + + std::size_t lastBlockSize = fFilePos - fBlockOffset; + R__ASSERT(lastBlockSize <= kBlockSize); + retval = fwrite(fBlock, 1, lastBlockSize, fFile); + if (retval != lastBlockSize) + throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); + + // Write the (updated) header block, unless it was part of the write above. + if (fBlockOffset > 0) { + retval = FSeek64(fFile, 0, SEEK_SET); + if (retval) + throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno))); + + retval = fwrite(fHeaderBlock, 1, kHeaderBlockSize, fFile); + if (retval != RFileSimple::kHeaderBlockSize) + throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); + } + + fflush(fFile); +} + void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Write(const void *buffer, size_t nbytes, std::int64_t offset) { R__ASSERT(fFile); size_t retval; if ((offset >= 0) && (static_cast(offset) != fFilePos)) { - retval = FSeek64(fFile, offset, SEEK_SET); - if (retval) - throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno))); fFilePos = offset; } - retval = fwrite(buffer, 1, nbytes, fFile); - if (retval != nbytes) - throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); - fFilePos += nbytes; + + // Keep header block to overwrite on commit. + if (fFilePos < kHeaderBlockSize) { + std::size_t headerBytes = nbytes; + if (fFilePos + headerBytes > kHeaderBlockSize) { + headerBytes = kHeaderBlockSize - fFilePos; + } + memcpy(fHeaderBlock + fFilePos, buffer, headerBytes); + } + + R__ASSERT(fFilePos >= fBlockOffset); + + while (nbytes > 0) { + std::uint64_t posInBlock = fFilePos % kBlockSize; + std::uint64_t blockOffset = fFilePos - posInBlock; + if (blockOffset != fBlockOffset) { + // Write the block. + retval = FSeek64(fFile, fBlockOffset, SEEK_SET); + if (retval) + throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno))); + + retval = fwrite(fBlock, 1, kBlockSize, fFile); + if (retval != kBlockSize) + throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); + + // TODO: do we want to null the buffer contents? + } + + fBlockOffset = blockOffset; + std::size_t blockSize = nbytes; + if (blockSize > kBlockSize - posInBlock) { + blockSize = kBlockSize - posInBlock; + } + memcpy(fBlock + posInBlock, buffer, blockSize); + buffer = static_cast(buffer) + blockSize; + nbytes -= blockSize; + fFilePos += blockSize; + } } std::uint64_t ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::WriteKey( @@ -1468,11 +1548,12 @@ void ROOT::Experimental::Internal::RNTupleFileWriter::Commit() if (fIsBare) { RTFNTuple ntupleOnDisk(fNTupleAnchor); - fFileSimple.Write(&ntupleOnDisk, ntupleOnDisk.GetSize(), fFileSimple.fControlBlock->fSeekNTuple); - // Append the checksum + // Compute the checksum std::uint64_t checksum = XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData()); - fFileSimple.Write(&checksum, sizeof(checksum)); - fflush(fFileSimple.fFile); + memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple, &ntupleOnDisk, ntupleOnDisk.GetSize()); + memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple + ntupleOnDisk.GetSize(), &checksum, + sizeof(checksum)); + fFileSimple.Flush(); return; } @@ -1482,10 +1563,13 @@ void ROOT::Experimental::Internal::RNTupleFileWriter::Commit() WriteTFileFreeList(); // Update header and TFile record - fFileSimple.Write(&fFileSimple.fControlBlock->fHeader, fFileSimple.fControlBlock->fHeader.GetSize(), 0); - fFileSimple.Write(&fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize(), - fFileSimple.fControlBlock->fSeekFileRecord); - fflush(fFileSimple.fFile); + memcpy(fFileSimple.fHeaderBlock, &fFileSimple.fControlBlock->fHeader, fFileSimple.fControlBlock->fHeader.GetSize()); + R__ASSERT(fFileSimple.fControlBlock->fSeekFileRecord + fFileSimple.fControlBlock->fFileRecord.GetSize() < + RFileSimple::kHeaderBlockSize); + memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekFileRecord, + &fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize()); + + fFileSimple.Flush(); } std::uint64_t ROOT::Experimental::Internal::RNTupleFileWriter::WriteBlob(const void *data, size_t nbytes, size_t len) From 2096db630eb1b579d99f5d5c89de917acb631dc1 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Thu, 4 Apr 2024 17:19:28 +0200 Subject: [PATCH 07/37] [ntuple] Disable C stdio buffering in RNTupleFileWriter RNTupleFileWriter::RFileSimple does its own buffering now. --- tree/ntuple/v7/src/RMiniFile.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index 6e46a4f7b4e05..1231b7b579b34 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1508,6 +1508,8 @@ ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupl FILE *fileStream = fopen(std::string(path.data(), path.size()).c_str(), "wb"); #endif R__ASSERT(fileStream); + // RNTupleFileWriter::RFileSimple does its own buffering, turn off additional buffering from C stdio. + std::setvbuf(fileStream, nullptr, _IONBF, 0); auto writer = std::unique_ptr(new RNTupleFileWriter(ntupleName, maxKeySize)); writer->fFileSimple.fFile = fileStream; From 92b59dd12e86db36eadf54add27c4a4f98cfbfbf Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 30 Jul 2024 11:26:35 +0200 Subject: [PATCH 08/37] [ntuple] Pass RNTupleWriteOptions into RNTupleFileWriter --- tree/ntuple/v7/inc/ROOT/RMiniFile.hxx | 6 ++-- tree/ntuple/v7/src/RMiniFile.cxx | 8 +++-- tree/ntuple/v7/src/RPageStorageFile.cxx | 3 +- tree/ntuple/v7/test/ntuple_compat.cxx | 4 +-- tree/ntuple/v7/test/ntuple_minifile.cxx | 44 +++++++++++++++---------- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx index 32122f21063c7..d800e5a7663b3 100644 --- a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx +++ b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx @@ -37,6 +37,8 @@ class RRawFile; namespace Experimental { +class RNTupleWriteOptions; + namespace Internal { /// Holds status information of an open ROOT file during writing struct RTFileControlBlock; @@ -194,8 +196,8 @@ public: /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName. /// Uses a C stream for writing static std::unique_ptr Recreate(std::string_view ntupleName, std::string_view path, - int defaultCompression, EContainerFormat containerFormat, - std::uint64_t maxKeySize); + EContainerFormat containerFormat, + const RNTupleWriteOptions &options); /// Add a new RNTuple identified by ntupleName to the existing TFile. static std::unique_ptr Append(std::string_view ntupleName, TFile &file, std::uint64_t maxKeySize); diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index 1231b7b579b34..e9e816fcda61b 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -1494,8 +1495,8 @@ ROOT::Experimental::Internal::RNTupleFileWriter::~RNTupleFileWriter() {} std::unique_ptr ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupleName, std::string_view path, - int defaultCompression, EContainerFormat containerFormat, - std::uint64_t maxKeySize) + EContainerFormat containerFormat, + const RNTupleWriteOptions &options) { std::string fileName(path); size_t idxDirSep = fileName.find_last_of("\\/"); @@ -1511,10 +1512,11 @@ ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupl // RNTupleFileWriter::RFileSimple does its own buffering, turn off additional buffering from C stdio. std::setvbuf(fileStream, nullptr, _IONBF, 0); - auto writer = std::unique_ptr(new RNTupleFileWriter(ntupleName, maxKeySize)); + auto writer = std::unique_ptr(new RNTupleFileWriter(ntupleName, options.GetMaxKeySize())); writer->fFileSimple.fFile = fileStream; writer->fFileName = fileName; + int defaultCompression = options.GetCompression(); switch (containerFormat) { case EContainerFormat::kTFile: writer->WriteTFileSkeleton(defaultCompression); break; case EContainerFormat::kBare: diff --git a/tree/ntuple/v7/src/RPageStorageFile.cxx b/tree/ntuple/v7/src/RPageStorageFile.cxx index 5ce16327daf6b..74c6a79e2259b 100644 --- a/tree/ntuple/v7/src/RPageStorageFile.cxx +++ b/tree/ntuple/v7/src/RPageStorageFile.cxx @@ -62,8 +62,7 @@ ROOT::Experimental::Internal::RPageSinkFile::RPageSinkFile(std::string_view ntup const RNTupleWriteOptions &options) : RPageSinkFile(ntupleName, options) { - fWriter = RNTupleFileWriter::Recreate(ntupleName, path, options.GetCompression(), - RNTupleFileWriter::EContainerFormat::kTFile, options.GetMaxKeySize()); + fWriter = RNTupleFileWriter::Recreate(ntupleName, path, RNTupleFileWriter::EContainerFormat::kTFile, options); } ROOT::Experimental::Internal::RPageSinkFile::RPageSinkFile(std::string_view ntupleName, TFile &file, diff --git a/tree/ntuple/v7/test/ntuple_compat.cxx b/tree/ntuple/v7/test/ntuple_compat.cxx index 63a0250f32b1c..d184d0876eb7c 100644 --- a/tree/ntuple/v7/test/ntuple_compat.cxx +++ b/tree/ntuple/v7/test/ntuple_compat.cxx @@ -38,8 +38,8 @@ TEST(RNTupleCompat, FeatureFlag) RFieldDescriptorBuilder::FromField(ROOT::Experimental::RFieldZero()).FieldId(0).MakeDescriptor().Unwrap()); ASSERT_TRUE(static_cast(descBuilder.EnsureValidDescriptor())); - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, - RNTupleWriteOptions::kDefaultMaxKeySize); + RNTupleWriteOptions options; + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); RNTupleSerializer serializer; auto ctx = serializer.SerializeHeader(nullptr, descBuilder.GetDescriptor()); diff --git a/tree/ntuple/v7/test/ntuple_minifile.cxx b/tree/ntuple/v7/test/ntuple_minifile.cxx index 5bf9ff200cf9f..8ef9b744a0d73 100644 --- a/tree/ntuple/v7/test/ntuple_minifile.cxx +++ b/tree/ntuple/v7/test/ntuple_minifile.cxx @@ -2,6 +2,8 @@ #include #include +using ROOT::Experimental::Internal::RNTupleWriteOptionsManip; + namespace { bool IsEqual(const ROOT::Experimental::RNTuple &a, const ROOT::Experimental::RNTuple &b) { @@ -25,8 +27,8 @@ TEST(MiniFile, Raw) { FileRaii fileGuard("test_ntuple_minifile_raw.ntuple"); - auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), 0, EContainerFormat::kBare, - RNTupleWriteOptions::kDefaultMaxKeySize); + RNTupleWriteOptions options; + auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), EContainerFormat::kBare, options); char header = 'h'; char footer = 'f'; char blob = 'b'; @@ -54,8 +56,8 @@ TEST(MiniFile, Stream) { FileRaii fileGuard("test_ntuple_minifile_stream.root"); - auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), 0, EContainerFormat::kTFile, - RNTupleWriteOptions::kDefaultMaxKeySize); + RNTupleWriteOptions options; + auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), EContainerFormat::kTFile, options); char header = 'h'; char footer = 'f'; char blob = 'b'; @@ -118,8 +120,8 @@ TEST(MiniFile, SimpleKeys) { FileRaii fileGuard("test_ntuple_minifile_simple_keys.root"); - auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), 0, EContainerFormat::kTFile, - RNTupleWriteOptions::kDefaultMaxKeySize); + RNTupleWriteOptions options; + auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), EContainerFormat::kTFile, options); char blob1 = '1'; auto offBlob1 = writer->WriteBlob(&blob1, 1, 1); @@ -358,7 +360,9 @@ TEST(MiniFile, MultiKeyBlob) std::uint64_t blobOffset; { - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + RNTupleWriteOptions options; + RNTupleWriteOptionsManip::SetMaxKeySize(options, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0x99, dataSize); data[42] = 0x42; data[dataSize - 42] = 0x11; @@ -395,7 +399,9 @@ TEST(MiniFile, MultiKeyBlob_ExactlyMax) std::uint64_t blobOffset; { - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + RNTupleWriteOptions options; + RNTupleWriteOptionsManip::SetMaxKeySize(options, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0, dataSize); blobOffset = writer->WriteBlob(data.get(), dataSize, dataSize); writer->Commit(); @@ -429,7 +435,9 @@ TEST(MiniFile, MultiKeyBlob_ExactlyTwo) std::uint64_t blobOffset; { - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + RNTupleWriteOptions options; + RNTupleWriteOptionsManip::SetMaxKeySize(options, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0, dataSize / 2); memset(data.get() + dataSize / 2, 0x99, dataSize / 2); data[42] = 0x42; @@ -473,7 +481,9 @@ TEST(MiniFile, MultiKeyBlob_SmallKey) std::uint64_t blobOffset; { - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + RNTupleWriteOptions options; + RNTupleWriteOptionsManip::SetMaxKeySize(options, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0x99, dataSize); data[42] = 0x42; data[dataSize - 42] = 0x84; @@ -511,11 +521,13 @@ TEST(MiniFile, MultiKeyBlob_TooManyChunks) FileRaii fileGuard("test_ntuple_minifile_multi_key_blob_small_key.root"); const auto kMaxKeySize = 128; + RNTupleWriteOptions options; + RNTupleWriteOptionsManip::SetMaxKeySize(options, kMaxKeySize); { const auto kOkayDataSize = 1024; const auto data = std::make_unique(kOkayDataSize); - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0x99, kOkayDataSize); writer->WriteBlob(data.get(), kOkayDataSize, kOkayDataSize); writer->Commit(); @@ -524,7 +536,7 @@ TEST(MiniFile, MultiKeyBlob_TooManyChunks) { const auto kTooBigDataSize = 5000; const auto data = std::make_unique(kTooBigDataSize); - auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), 0, EContainerFormat::kTFile, kMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("ntpl", fileGuard.GetPath(), EContainerFormat::kTFile, options); memset(data.get(), 0x99, kTooBigDataSize); EXPECT_DEATH(writer->WriteBlob(data.get(), kTooBigDataSize, kTooBigDataSize), ""); writer->Commit(); @@ -580,15 +592,13 @@ TEST(MiniFile, Multi) TEST(MiniFile, Failures) { + RNTupleWriteOptions options; // TODO(jblomer): failures should be exceptions - EXPECT_DEATH(RNTupleFileWriter::Recreate("MyNTuple", "/can/not/open", 0, EContainerFormat::kTFile, - RNTupleWriteOptions::kDefaultMaxKeySize), - ".*"); + EXPECT_DEATH(RNTupleFileWriter::Recreate("MyNTuple", "/can/not/open", EContainerFormat::kTFile, options), ".*"); FileRaii fileGuard("test_ntuple_minifile_failures.root"); - auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), 0, EContainerFormat::kTFile, - RNTupleWriteOptions::kDefaultMaxKeySize); + auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), EContainerFormat::kTFile, options); char header = 'h'; char footer = 'f'; char blob = 'b'; From a91dbd5d403e82e9562be26d06092f2df180fb87 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Thu, 4 Apr 2024 17:04:46 +0200 Subject: [PATCH 09/37] [ntuple] RNTupleFileWriter: Use open + fdopen This allows more fine-grained control over the flags passed to the system call, which will be required to enable Direct I/O. --- tree/ntuple/v7/src/RMiniFile.cxx | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index e9e816fcda61b..d250e3eee5637 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -39,6 +39,10 @@ #include #include +#ifdef R__LINUX +#include +#endif + #ifndef R__LITTLE_ENDIAN #ifdef R__BYTESWAP // `R__BYTESWAP` is defined in RConfig.hxx for little-endian architectures; undefined otherwise @@ -1503,10 +1507,20 @@ ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupl if (idxDirSep != std::string::npos) { fileName.erase(0, idxDirSep + 1); } +#ifdef R__LINUX + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef O_LARGEFILE + // Add the equivalent flag that is passed by fopen64. + flags |= O_LARGEFILE; +#endif + int fd = open(std::string(path).c_str(), flags, 0666); + FILE *fileStream = fdopen(fd, "wb"); +#else #ifdef R__SEEK64 FILE *fileStream = fopen64(std::string(path.data(), path.size()).c_str(), "wb"); #else FILE *fileStream = fopen(std::string(path.data(), path.size()).c_str(), "wb"); +#endif #endif R__ASSERT(fileStream); // RNTupleFileWriter::RFileSimple does its own buffering, turn off additional buffering from C stdio. From be6add521c1cb56e07fe00943c1f325379678c56 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Thu, 4 Apr 2024 17:04:46 +0200 Subject: [PATCH 10/37] [ntuple] Add option to enable Direct I/O Direct I/O bypasses the OS page cache and allows to reach much higher bandwidths. However, it introduces strict alignment requirements to the offset and size of all write requests, as well as the userspace pointer. --- tree/ntuple/v7/inc/ROOT/RMiniFile.hxx | 2 ++ tree/ntuple/v7/inc/ROOT/RNTupleWriteOptions.hxx | 6 ++++++ tree/ntuple/v7/src/RMiniFile.cxx | 12 +++++++++++- tree/ntuple/v7/test/ntuple_minifile.cxx | 2 ++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx index d800e5a7663b3..f8e5b8c64f69e 100644 --- a/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx +++ b/tree/ntuple/v7/inc/ROOT/RMiniFile.hxx @@ -130,6 +130,8 @@ private: /// For the simplest cases, a C file stream can be used for writing FILE *fFile = nullptr; + /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements. + bool fDirectIO = false; /// Keeps track of the seek offset std::uint64_t fFilePos = 0; /// Keeps track of the next key offset diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleWriteOptions.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleWriteOptions.hxx index 8b71465973b42..ab212590b721b 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleWriteOptions.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleWriteOptions.hxx @@ -76,6 +76,9 @@ protected: /// Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them /// to keep pages of the same column adjacent, and coalesces the writes when committing a cluster. bool fUseBufferedWrite = true; + /// Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between + /// filesystems and platforms. + bool fUseDirectIO = false; /// Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on. EImplicitMT fUseImplicitMT = EImplicitMT::kDefault; /// If set, 64bit index columns are replaced by 32bit index columns. This limits the cluster size to 512MB @@ -118,6 +121,9 @@ public: bool GetUseBufferedWrite() const { return fUseBufferedWrite; } void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; } + bool GetUseDirectIO() const { return fUseDirectIO; } + void SetUseDirectIO(bool val) { fUseDirectIO = val; } + EImplicitMT GetUseImplicitMT() const { return fUseImplicitMT; } void SetUseImplicitMT(EImplicitMT val) { fUseImplicitMT = val; } diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index d250e3eee5637..348cdc35aadfd 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1336,7 +1336,7 @@ int FSeek64(FILE *stream, std::int64_t offset, int origin) void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Flush() { - // Write the last partially filled block. + // Write the last partially filled block, which may still need appropriate alignment for Direct I/O. // If it is the first block, get the updated header block. if (fBlockOffset == 0) { std::size_t headerBlockSize = kHeaderBlockSize; @@ -1352,6 +1352,12 @@ void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Flush() std::size_t lastBlockSize = fFilePos - fBlockOffset; R__ASSERT(lastBlockSize <= kBlockSize); + if (fDirectIO) { + // Round up to a multiple of kBlockAlign. + lastBlockSize += kBlockAlign - 1; + lastBlockSize = (lastBlockSize / kBlockAlign) * kBlockAlign; + R__ASSERT(lastBlockSize <= kBlockSize); + } retval = fwrite(fBlock, 1, lastBlockSize, fFile); if (retval != lastBlockSize) throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); @@ -1513,6 +1519,9 @@ ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupl // Add the equivalent flag that is passed by fopen64. flags |= O_LARGEFILE; #endif + if (options.GetUseDirectIO()) { + flags |= O_DIRECT; + } int fd = open(std::string(path).c_str(), flags, 0666); FILE *fileStream = fdopen(fd, "wb"); #else @@ -1528,6 +1537,7 @@ ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupl auto writer = std::unique_ptr(new RNTupleFileWriter(ntupleName, options.GetMaxKeySize())); writer->fFileSimple.fFile = fileStream; + writer->fFileSimple.fDirectIO = options.GetUseDirectIO(); writer->fFileName = fileName; int defaultCompression = options.GetCompression(); diff --git a/tree/ntuple/v7/test/ntuple_minifile.cxx b/tree/ntuple/v7/test/ntuple_minifile.cxx index 8ef9b744a0d73..c42cff41168d6 100644 --- a/tree/ntuple/v7/test/ntuple_minifile.cxx +++ b/tree/ntuple/v7/test/ntuple_minifile.cxx @@ -121,6 +121,8 @@ TEST(MiniFile, SimpleKeys) FileRaii fileGuard("test_ntuple_minifile_simple_keys.root"); RNTupleWriteOptions options; + // We check the file size at the end, so Direct I/O alignment requirements must not introduce padding. + options.SetUseDirectIO(false); auto writer = RNTupleFileWriter::Recreate("MyNTuple", fileGuard.GetPath(), EContainerFormat::kTFile, options); char blob1 = '1'; From 51fb7a9a58fbe29c0c60d327d0923d64330ff648 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Wed, 31 Jul 2024 17:04:26 +0200 Subject: [PATCH 11/37] [ntuple] Check return value of fflush --- tree/ntuple/v7/src/RMiniFile.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tree/ntuple/v7/src/RMiniFile.cxx b/tree/ntuple/v7/src/RMiniFile.cxx index 348cdc35aadfd..c8fe7f08e6a45 100644 --- a/tree/ntuple/v7/src/RMiniFile.cxx +++ b/tree/ntuple/v7/src/RMiniFile.cxx @@ -1373,7 +1373,9 @@ void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Flush() throw RException(R__FAIL(std::string("write failed: ") + strerror(errno))); } - fflush(fFile); + retval = fflush(fFile); + if (retval) + throw RException(R__FAIL(std::string("Flush failed: ") + strerror(errno))); } void ROOT::Experimental::Internal::RNTupleFileWriter::RFileSimple::Write(const void *buffer, size_t nbytes, From 6818e10972bd702e07fca33ec879eed678fa5752 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 23 Jul 2024 23:33:11 +0200 Subject: [PATCH 12/37] [ntuple] make RField::fColumnRepresentative(s) a vector --- tree/ntuple/v7/inc/ROOT/RField.hxx | 9 +++++---- tree/ntuple/v7/src/RField.cxx | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 7bcfd28cb93b7..7201162fd39e4 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -415,9 +415,10 @@ protected: /// TClass checksum cached from the descriptor after a call to `ConnectPageSource()`. Only set /// for classes with dictionaries. std::uint32_t fOnDiskTypeChecksum = 0; - /// Points into the static vector GetColumnRepresentations().GetSerializationTypes() when SetColumnRepresentative - /// is called. Otherwise GetColumnRepresentative returns the default representation. - const ColumnRepresentation_t *fColumnRepresentative = nullptr; + /// Pointers into the static vector GetColumnRepresentations().GetSerializationTypes() when + /// SetColumnRepresentative(s) is called. Otherwise (if empty) GetColumnRepresentative() returns the + /// default representation. + std::vector> fColumnRepresentatives; /// Helpers for generating columns. We use the fact that most fields have the same C++/memory types /// for all their column representations. @@ -732,7 +733,7 @@ public: /// an exception is thrown void SetColumnRepresentative(const ColumnRepresentation_t &representative); /// Whether or not an explicit column representative was set - bool HasDefaultColumnRepresentative() const { return fColumnRepresentative == nullptr; } + bool HasDefaultColumnRepresentative() const { return fColumnRepresentatives.empty(); } /// Indicates an evolution of the mapping scheme from C++ type to columns virtual std::uint32_t GetFieldVersion() const { return 0; } diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index a9a7022dc9221..e9012071c5b85 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -877,8 +877,8 @@ std::unique_ptr ROOT::Experimental::RFieldBase:: clone->fTypeAlias = fTypeAlias; clone->fOnDiskId = fOnDiskId; clone->fDescription = fDescription; - // We can just copy the pointer because fColumnRepresentative points into a static structure - clone->fColumnRepresentative = fColumnRepresentative; + // We can just copy the references because fColumnRepresentatives point into a static structure + clone->fColumnRepresentatives = fColumnRepresentatives; return clone; } @@ -1001,8 +1001,8 @@ void ROOT::Experimental::RFieldBase::SetOnDiskId(DescriptorId_t id) const ROOT::Experimental::RFieldBase::ColumnRepresentation_t & ROOT::Experimental::RFieldBase::GetColumnRepresentative() const { - if (fColumnRepresentative) - return *fColumnRepresentative; + if (!fColumnRepresentatives.empty()) + return fColumnRepresentatives[0].get(); return GetColumnRepresentations().GetSerializationDefault(); } @@ -1014,7 +1014,7 @@ void ROOT::Experimental::RFieldBase::SetColumnRepresentative(const ColumnReprese auto itRepresentative = std::find(validTypes.begin(), validTypes.end(), representative); if (itRepresentative == std::end(validTypes)) throw RException(R__FAIL("invalid column representative")); - fColumnRepresentative = &(*itRepresentative); + fColumnRepresentatives = {*itRepresentative}; } const ROOT::Experimental::RFieldBase::ColumnRepresentation_t & @@ -1122,7 +1122,7 @@ void ROOT::Experimental::RFieldBase::ConnectPageSource(Internal::RPageSource &pa if (fState != EState::kUnconnected) throw RException(R__FAIL("invalid attempt to connect an already connected field to a page source")); - if (fColumnRepresentative) + if (!fColumnRepresentatives.empty()) throw RException(R__FAIL("fixed column representative only valid when connecting to a page sink")); if (!fDescription.empty()) throw RException(R__FAIL("setting description only valid when connecting to a page sink")); @@ -1144,10 +1144,12 @@ void ROOT::Experimental::RFieldBase::ConnectPageSource(Internal::RPageSource &pa onDiskColumnTypes.emplace_back(c->GetType()); } for (const auto &t : GetColumnRepresentations().GetDeserializationTypes()) { - if (t == onDiskColumnTypes) - fColumnRepresentative = &t; + if (t == onDiskColumnTypes) { + fColumnRepresentatives = {t}; + break; + } } - R__ASSERT(fColumnRepresentative); + R__ASSERT(!fColumnRepresentatives.empty()); if (fOnDiskId != kInvalidDescriptorId) { const auto &fieldDesc = desc.GetFieldDescriptor(fOnDiskId); fOnDiskTypeVersion = fieldDesc.GetTypeVersion(); From 302e32ae2a391fb8b25f3682c38dd2fcf331ed48 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 24 Jul 2024 15:10:11 +0200 Subject: [PATCH 13/37] [ntuple] RColumnRepresentations::TypesList_t --> Selection_t --- tree/ntuple/v7/inc/ROOT/RField.hxx | 14 ++++++++------ tree/ntuple/v7/src/RField.cxx | 2 +- tree/ntuple/v7/test/ntuple_types.cxx | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 7201162fd39e4..4e5051c7b5e23 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -172,20 +172,22 @@ public: /// i.e. for the example above, the unpacking of 32bit ints to 64bit pages must be implemented in RColumnElement.hxx class RColumnRepresentations { public: - using TypesList_t = std::vector; + /// A list of column representations + using Selection_t = std::vector; + RColumnRepresentations(); - RColumnRepresentations(const TypesList_t &serializationTypes, const TypesList_t &deserializationExtraTypes); + RColumnRepresentations(const Selection_t &serializationTypes, const Selection_t &deserializationExtraTypes); /// The first column list from fSerializationTypes is the default for writing. const ColumnRepresentation_t &GetSerializationDefault() const { return fSerializationTypes[0]; } - const TypesList_t &GetSerializationTypes() const { return fSerializationTypes; } - const TypesList_t &GetDeserializationTypes() const { return fDeserializationTypes; } + const Selection_t &GetSerializationTypes() const { return fSerializationTypes; } + const Selection_t &GetDeserializationTypes() const { return fDeserializationTypes; } private: - TypesList_t fSerializationTypes; + Selection_t fSerializationTypes; /// The union of the serialization types and the deserialization extra types. Duplicates the serialization types /// list but the benenfit is that GetDeserializationTypes does not need to compile the list. - TypesList_t fDeserializationTypes; + Selection_t fDeserializationTypes; }; // class RColumnRepresentations /// Points to an object with RNTuple I/O support and keeps a pointer to the corresponding field. diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index e9012071c5b85..8d48d0b2b5b94 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -427,7 +427,7 @@ ROOT::Experimental::RFieldBase::RColumnRepresentations::RColumnRepresentations() } ROOT::Experimental::RFieldBase::RColumnRepresentations::RColumnRepresentations( - const TypesList_t &serializationTypes, const TypesList_t &deserializationExtraTypes) + const Selection_t &serializationTypes, const Selection_t &deserializationExtraTypes) : fSerializationTypes(serializationTypes), fDeserializationTypes(serializationTypes) { fDeserializationTypes.insert(fDeserializationTypes.end(), diff --git a/tree/ntuple/v7/test/ntuple_types.cxx b/tree/ntuple/v7/test/ntuple_types.cxx index ce6797542df36..40fe1eb2222f3 100644 --- a/tree/ntuple/v7/test/ntuple_types.cxx +++ b/tree/ntuple/v7/test/ntuple_types.cxx @@ -2112,13 +2112,13 @@ TEST(RNTuple, RColumnRepresentations) using RColumnRepresentations = ROOT::Experimental::RFieldBase::RColumnRepresentations; RColumnRepresentations colReps1; EXPECT_EQ(RFieldBase::ColumnRepresentation_t(), colReps1.GetSerializationDefault()); - EXPECT_EQ(RColumnRepresentations::TypesList_t{RFieldBase::ColumnRepresentation_t()}, + EXPECT_EQ(RColumnRepresentations::Selection_t{RFieldBase::ColumnRepresentation_t()}, colReps1.GetDeserializationTypes()); RColumnRepresentations colReps2({{EColumnType::kReal64}, {EColumnType::kSplitReal64}}, {{EColumnType::kReal32}, {EColumnType::kReal16}}); EXPECT_EQ(RFieldBase::ColumnRepresentation_t({EColumnType::kReal64}), colReps2.GetSerializationDefault()); - EXPECT_EQ(RColumnRepresentations::TypesList_t( + EXPECT_EQ(RColumnRepresentations::Selection_t( {{EColumnType::kReal64}, {EColumnType::kSplitReal64}, {EColumnType::kReal32}, {EColumnType::kReal16}}), colReps2.GetDeserializationTypes()); } From 92adf61aa97457e074f04ea7ad8e8347fe19ca71 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 24 Jul 2024 15:22:06 +0200 Subject: [PATCH 14/37] [ntuple] vector interface for SetColumnRepresentative(s) --- tree/ntuple/v7/inc/ROOT/RField.hxx | 6 +-- tree/ntuple/v7/src/RField.cxx | 22 ++++++---- tree/ntuple/v7/test/ntuple_packing.cxx | 2 +- tree/ntuple/v7/test/ntuple_types.cxx | 44 ++++++++++---------- tree/ntupleutil/v7/test/ntuple_importer.cxx | 2 +- tree/ntupleutil/v7/test/ntuple_inspector.cxx | 4 +- 6 files changed, 42 insertions(+), 38 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 4e5051c7b5e23..4cb2693668d2d 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -733,7 +733,7 @@ public: /// Fixes a column representative. This can only be done _before_ connecting the field to a page sink. /// Otherwise, or if the provided representation is not in the list of GetColumnRepresentations, /// an exception is thrown - void SetColumnRepresentative(const ColumnRepresentation_t &representative); + void SetColumnRepresentatives(const RColumnRepresentations::Selection_t &representatives); /// Whether or not an explicit column representative was set bool HasDefaultColumnRepresentative() const { return fColumnRepresentatives.empty(); } @@ -1538,8 +1538,8 @@ public: RNullableField &operator=(RNullableField &&other) = default; ~RNullableField() override = default; - void SetDense() { SetColumnRepresentative({EColumnType::kBit}); } - void SetSparse() { SetColumnRepresentative({EColumnType::kSplitIndex64}); } + void SetDense() { SetColumnRepresentatives({{EColumnType::kBit}}); } + void SetSparse() { SetColumnRepresentatives({{EColumnType::kSplitIndex64}}); } void AcceptVisitor(Detail::RFieldVisitor &visitor) const final; }; diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 8d48d0b2b5b94..361fe336fd761 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -1006,15 +1006,19 @@ ROOT::Experimental::RFieldBase::GetColumnRepresentative() const return GetColumnRepresentations().GetSerializationDefault(); } -void ROOT::Experimental::RFieldBase::SetColumnRepresentative(const ColumnRepresentation_t &representative) +void ROOT::Experimental::RFieldBase::SetColumnRepresentatives( + const RColumnRepresentations::Selection_t &representatives) { if (fState != EState::kUnconnected) throw RException(R__FAIL("cannot set column representative once field is connected")); const auto &validTypes = GetColumnRepresentations().GetSerializationTypes(); - auto itRepresentative = std::find(validTypes.begin(), validTypes.end(), representative); - if (itRepresentative == std::end(validTypes)) - throw RException(R__FAIL("invalid column representative")); - fColumnRepresentatives = {*itRepresentative}; + fColumnRepresentatives.clear(); + for (const auto &r : representatives) { + auto itRepresentative = std::find(validTypes.begin(), validTypes.end(), r); + if (itRepresentative == std::end(validTypes)) + throw RException(R__FAIL("invalid column representative")); + fColumnRepresentatives.emplace_back(*itRepresentative); + } } const ROOT::Experimental::RFieldBase::ColumnRepresentation_t & @@ -1071,7 +1075,7 @@ void ROOT::Experimental::RFieldBase::AutoAdjustColumnTypes(const RNTupleWriteOpt default: break; } } - SetColumnRepresentative(rep); + SetColumnRepresentatives({rep}); } if (options.GetHasSmallClusters()) { @@ -1083,11 +1087,11 @@ void ROOT::Experimental::RFieldBase::AutoAdjustColumnTypes(const RNTupleWriteOpt default: break; } } - SetColumnRepresentative(rep); + SetColumnRepresentatives({rep}); } if (fTypeAlias == "Double32_t") - SetColumnRepresentative({EColumnType::kSplitReal32}); + SetColumnRepresentatives({{EColumnType::kSplitReal32}}); } void ROOT::Experimental::RFieldBase::ConnectPageSink(Internal::RPageSink &pageSink, NTupleSize_t firstEntry) @@ -1325,7 +1329,7 @@ void ROOT::Experimental::RField::AcceptVisitor(Detail::RFieldVisitor &vis void ROOT::Experimental::RField::SetHalfPrecision() { - SetColumnRepresentative({EColumnType::kReal16}); + SetColumnRepresentatives({{EColumnType::kReal16}}); } //------------------------------------------------------------------------------ diff --git a/tree/ntuple/v7/test/ntuple_packing.cxx b/tree/ntuple/v7/test/ntuple_packing.cxx index cd19707477e7f..de64dae377e39 100644 --- a/tree/ntuple/v7/test/ntuple_packing.cxx +++ b/tree/ntuple/v7/test/ntuple_packing.cxx @@ -229,7 +229,7 @@ template static void AddField(RNTupleModel &model, const std::string &fieldName) { auto fld = std::make_unique>(fieldName); - fld->SetColumnRepresentative({ColumnT}); + fld->SetColumnRepresentatives({{ColumnT}}); model.AddField(std::move(fld)); } diff --git a/tree/ntuple/v7/test/ntuple_types.cxx b/tree/ntuple/v7/test/ntuple_types.cxx index 40fe1eb2222f3..1e50bc6dd316c 100644 --- a/tree/ntuple/v7/test/ntuple_types.cxx +++ b/tree/ntuple/v7/test/ntuple_types.cxx @@ -698,19 +698,19 @@ TEST(RNTuple, Int64) auto model = RNTupleModel::Create(); auto f1 = std::make_unique>("i1"); - f1->SetColumnRepresentative({ROOT::Experimental::EColumnType::kInt64}); + f1->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kInt64}}); model->AddField(std::move(f1)); auto f2 = std::make_unique>("i2"); - f2->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitInt64}); + f2->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitInt64}}); model->AddField(std::move(f2)); auto f3 = std::make_unique>("i3"); - f3->SetColumnRepresentative({ROOT::Experimental::EColumnType::kUInt64}); + f3->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kUInt64}}); model->AddField(std::move(f3)); auto f4 = std::make_unique>("i4"); - f4->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitUInt64}); + f4->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitUInt64}}); model->AddField(std::move(f4)); { @@ -768,19 +768,19 @@ TEST(RNTuple, Int32) auto model = RNTupleModel::Create(); auto f1 = std::make_unique>("i1"); - f1->SetColumnRepresentative({ROOT::Experimental::EColumnType::kInt32}); + f1->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kInt32}}); model->AddField(std::move(f1)); auto f2 = std::make_unique>("i2"); - f2->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitInt32}); + f2->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitInt32}}); model->AddField(std::move(f2)); auto f3 = std::make_unique>("i3"); - f3->SetColumnRepresentative({ROOT::Experimental::EColumnType::kUInt32}); + f3->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kUInt32}}); model->AddField(std::move(f3)); auto f4 = std::make_unique>("i4"); - f4->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitUInt32}); + f4->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitUInt32}}); model->AddField(std::move(f4)); { @@ -838,19 +838,19 @@ TEST(RNTuple, Int16) auto model = RNTupleModel::Create(); auto f1 = std::make_unique>("i1"); - f1->SetColumnRepresentative({ROOT::Experimental::EColumnType::kInt16}); + f1->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kInt16}}); model->AddField(std::move(f1)); auto f2 = std::make_unique>("i2"); - f2->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitInt16}); + f2->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitInt16}}); model->AddField(std::move(f2)); auto f3 = std::make_unique>("i3"); - f3->SetColumnRepresentative({ROOT::Experimental::EColumnType::kUInt16}); + f3->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kUInt16}}); model->AddField(std::move(f3)); auto f4 = std::make_unique>("i4"); - f4->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitUInt16}); + f4->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitUInt16}}); model->AddField(std::move(f4)); { @@ -1057,11 +1057,11 @@ TEST(RNTuple, Double) auto model = RNTupleModel::Create(); auto f1 = std::make_unique>("d1"); - f1->SetColumnRepresentative({ROOT::Experimental::EColumnType::kReal64}); + f1->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kReal64}}); model->AddField(std::move(f1)); auto f2 = std::make_unique>("d2"); - f2->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitReal64}); + f2->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitReal64}}); model->AddField(std::move(f2)); { @@ -1090,11 +1090,11 @@ TEST(RNTuple, Float) auto model = RNTupleModel::Create(); auto f1 = std::make_unique>("f1"); - f1->SetColumnRepresentative({ROOT::Experimental::EColumnType::kReal32}); + f1->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kReal32}}); model->AddField(std::move(f1)); auto f2 = std::make_unique>("f2"); - f2->SetColumnRepresentative({ROOT::Experimental::EColumnType::kSplitReal32}); + f2->SetColumnRepresentatives({{ROOT::Experimental::EColumnType::kSplitReal32}}); model->AddField(std::move(f2)); { @@ -1482,13 +1482,13 @@ TEST(RNTuple, Casting) FileRaii fileGuard("test_ntuple_casting.root"); auto fldI1 = RFieldBase::Create("i1", "std::int32_t").Unwrap(); - fldI1->SetColumnRepresentative({EColumnType::kInt32}); + fldI1->SetColumnRepresentatives({{EColumnType::kInt32}}); auto fldI2 = RFieldBase::Create("i2", "std::int32_t").Unwrap(); - fldI2->SetColumnRepresentative({EColumnType::kSplitInt32}); + fldI2->SetColumnRepresentatives({{EColumnType::kSplitInt32}}); auto fldF = ROOT::Experimental::RFieldBase::Create("F", "float").Unwrap(); - fldF->SetColumnRepresentative({EColumnType::kReal32}); + fldF->SetColumnRepresentatives({{EColumnType::kReal32}}); try { - fldF->SetColumnRepresentative({EColumnType::kBit}); + fldF->SetColumnRepresentatives({{EColumnType::kBit}}); } catch (const RException &err) { EXPECT_THAT(err.what(), testing::HasSubstr("invalid column representative")); } @@ -1507,7 +1507,7 @@ TEST(RNTuple, Casting) try { auto model = RNTupleModel::Create(); auto f = ROOT::Experimental::RFieldBase::Create("i1", "std::int32_t").Unwrap(); - f->SetColumnRepresentative({EColumnType::kInt32}); + f->SetColumnRepresentatives({{EColumnType::kInt32}}); model->AddField(std::move(f)); auto reader = RNTupleReader::Open(std::move(model), "ntuple", fileGuard.GetPath()); FAIL() << "should not be able fix column representation when model is connected to a page source"; @@ -1592,7 +1592,7 @@ TEST(RNTuple, Double32) FileRaii fileGuard("test_ntuple_double32.root"); auto fldD1 = RFieldBase::Create("d1", "double").Unwrap(); - fldD1->SetColumnRepresentative({EColumnType::kReal32}); + fldD1->SetColumnRepresentatives({{EColumnType::kReal32}}); auto fldD2 = RFieldBase::Create("d2", "Double32_t").Unwrap(); EXPECT_EQ("Double32_t", fldD2->GetTypeAlias()); diff --git a/tree/ntupleutil/v7/test/ntuple_importer.cxx b/tree/ntupleutil/v7/test/ntuple_importer.cxx index cd72a16eec19e..243f04a65b026 100644 --- a/tree/ntupleutil/v7/test/ntuple_importer.cxx +++ b/tree/ntupleutil/v7/test/ntuple_importer.cxx @@ -239,7 +239,7 @@ TEST(RNTupleImporter, FieldModifier) auto fnLowPrecisionFloatModifier = [](RFieldBase &field) { if (field.GetFieldName() == "a") - field.SetColumnRepresentative({EColumnType::kReal16}); + field.SetColumnRepresentatives({{EColumnType::kReal16}}); }; auto importer = RNTupleImporter::Create(fileGuard.GetPath(), "tree", fileGuard.GetPath()); diff --git a/tree/ntupleutil/v7/test/ntuple_inspector.cxx b/tree/ntupleutil/v7/test/ntuple_inspector.cxx index 28d6b5a0ec512..7c36efcb279f1 100644 --- a/tree/ntupleutil/v7/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/v7/test/ntuple_inspector.cxx @@ -297,11 +297,11 @@ TEST(RNTupleInspector, ColumnInfoUncompressed) auto model = RNTupleModel::Create(); auto int32fld = std::make_unique>("int32"); - int32fld->SetColumnRepresentative({EColumnType::kInt32}); + int32fld->SetColumnRepresentatives({{EColumnType::kInt32}}); model->AddField(std::move(int32fld)); auto splitReal64fld = std::make_unique>("splitReal64"); - splitReal64fld->SetColumnRepresentative({EColumnType::kSplitReal64}); + splitReal64fld->SetColumnRepresentatives({{EColumnType::kSplitReal64}}); model->AddField(std::move(splitReal64fld)); auto writeOptions = RNTupleWriteOptions(); From 2bc8023f5971d0c4b36727da5652024710d82f44 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Jul 2024 07:16:20 +0200 Subject: [PATCH 15/37] [ntuple] vector interface for GetColumnRepresentative(s) --- tree/ntuple/v7/inc/ROOT/RField.hxx | 13 +++++++---- tree/ntuple/v7/src/RField.cxx | 26 ++++++++++++++------- tree/ntuple/v7/src/RNTupleModel.cxx | 4 +++- tree/ntuple/v7/test/ntuple_packing.cxx | 2 +- tree/ntuple/v7/test/ntuple_types.cxx | 14 +++++------ tree/ntupleutil/v7/test/ntuple_importer.cxx | 4 ++-- 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 4cb2693668d2d..9f00a29b45431 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -418,8 +418,8 @@ protected: /// for classes with dictionaries. std::uint32_t fOnDiskTypeChecksum = 0; /// Pointers into the static vector GetColumnRepresentations().GetSerializationTypes() when - /// SetColumnRepresentative(s) is called. Otherwise (if empty) GetColumnRepresentative() returns the - /// default representation. + /// SetColumnRepresentatives is called. Otherwise (if empty) GetColumnRepresentatives() returns a vector + /// with a single element, the default representation. std::vector> fColumnRepresentatives; /// Helpers for generating columns. We use the fact that most fields have the same C++/memory types @@ -438,7 +438,12 @@ protected: template void GenerateColumnsImpl() { - GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentative()); + if (fColumnRepresentatives.empty()) { + GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault()); + } else { + // TODO(jblomer): generate columns for all of the available representations + GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[0].get()); + } } /// For reading, use the on-disk column list @@ -729,7 +734,7 @@ public: void SetOnDiskId(DescriptorId_t id); /// Returns the fColumnRepresentative pointee or, if unset, the field's default representative - const ColumnRepresentation_t &GetColumnRepresentative() const; + RColumnRepresentations::Selection_t GetColumnRepresentatives() const; /// Fixes a column representative. This can only be done _before_ connecting the field to a page sink. /// Otherwise, or if the provided representation is not in the list of GetColumnRepresentations, /// an exception is thrown diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 361fe336fd761..570398cb6582d 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -998,12 +998,19 @@ void ROOT::Experimental::RFieldBase::SetOnDiskId(DescriptorId_t id) fOnDiskId = id; } -const ROOT::Experimental::RFieldBase::ColumnRepresentation_t & -ROOT::Experimental::RFieldBase::GetColumnRepresentative() const +ROOT::Experimental::RFieldBase::RColumnRepresentations::Selection_t +ROOT::Experimental::RFieldBase::GetColumnRepresentatives() const { - if (!fColumnRepresentatives.empty()) - return fColumnRepresentatives[0].get(); - return GetColumnRepresentations().GetSerializationDefault(); + if (fColumnRepresentatives.empty()) { + return {GetColumnRepresentations().GetSerializationDefault()}; + } + + RColumnRepresentations::Selection_t result; + result.reserve(fColumnRepresentatives.size()); + for (const auto &r : fColumnRepresentatives) { + result.emplace_back(r.get()); + } + return result; } void ROOT::Experimental::RFieldBase::SetColumnRepresentatives( @@ -1062,7 +1069,7 @@ void ROOT::Experimental::RFieldBase::RemoveReadCallback(size_t idx) void ROOT::Experimental::RFieldBase::AutoAdjustColumnTypes(const RNTupleWriteOptions &options) { if ((options.GetCompression() == 0) && HasDefaultColumnRepresentative()) { - ColumnRepresentation_t rep = GetColumnRepresentative(); + ColumnRepresentation_t rep = GetColumnRepresentations().GetSerializationDefault(); for (auto &colType : rep) { switch (colType) { case EColumnType::kSplitIndex64: colType = EColumnType::kIndex64; break; @@ -1079,7 +1086,7 @@ void ROOT::Experimental::RFieldBase::AutoAdjustColumnTypes(const RNTupleWriteOpt } if (options.GetHasSmallClusters()) { - ColumnRepresentation_t rep = GetColumnRepresentative(); + ColumnRepresentation_t rep = GetColumnRepresentations().GetSerializationDefault(); for (auto &colType : rep) { switch (colType) { case EColumnType::kSplitIndex64: colType = EColumnType::kSplitIndex32; break; @@ -3383,11 +3390,12 @@ ROOT::Experimental::RNullableField::GetColumnRepresentations() const void ROOT::Experimental::RNullableField::GenerateColumns() { - if (GetColumnRepresentative()[0] == EColumnType::kBit) { + // TODO(jblomer): handle all column representatives + if (GetColumnRepresentatives()[0][0] == EColumnType::kBit) { fDefaultItemValue = std::make_unique(fSubFields[0]->CreateValue()); fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0)); } else { - fColumns.emplace_back(Internal::RColumn::Create(GetColumnRepresentative()[0], 0)); + fColumns.emplace_back(Internal::RColumn::Create(GetColumnRepresentatives()[0][0], 0)); } } diff --git a/tree/ntuple/v7/src/RNTupleModel.cxx b/tree/ntuple/v7/src/RNTupleModel.cxx index d245906ad4e71..747e271bbb64e 100644 --- a/tree/ntuple/v7/src/RNTupleModel.cxx +++ b/tree/ntuple/v7/src/RNTupleModel.cxx @@ -477,7 +477,9 @@ std::size_t ROOT::Experimental::RNTupleModel::EstimateWriteMemoryUsage(const RNT std::size_t nColumns = 0; for (auto &&field : *fFieldZero) { - nColumns += field.GetColumnRepresentative().size(); + for (const auto &r : field.GetColumnRepresentatives()) { + nColumns += r.size(); + } } const std::size_t pageBuffersPerModel = nColumns * pageBufferPerColumn; bytes += pageBuffersPerModel; diff --git a/tree/ntuple/v7/test/ntuple_packing.cxx b/tree/ntuple/v7/test/ntuple_packing.cxx index de64dae377e39..510b81464dcaa 100644 --- a/tree/ntuple/v7/test/ntuple_packing.cxx +++ b/tree/ntuple/v7/test/ntuple_packing.cxx @@ -353,7 +353,7 @@ TEST(Packing, OnDiskEncoding) EXPECT_EQ(memcmp(sealedPage.GetBuffer(), expIndex64, sizeof(expIndex64)), 0); auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath()); - EXPECT_EQ(EColumnType::kIndex64, reader->GetModel().GetField("str").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kIndex64, reader->GetModel().GetField("str").GetColumnRepresentatives()[0][0]); EXPECT_EQ(2u, reader->GetNEntries()); auto viewStr = reader->GetView("str"); EXPECT_EQ(std::string("abc"), viewStr(0)); diff --git a/tree/ntuple/v7/test/ntuple_types.cxx b/tree/ntuple/v7/test/ntuple_types.cxx index 1e50bc6dd316c..933a052acf963 100644 --- a/tree/ntuple/v7/test/ntuple_types.cxx +++ b/tree/ntuple/v7/test/ntuple_types.cxx @@ -1234,13 +1234,13 @@ TYPED_TEST(UniquePtr, Basics) auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); if constexpr (std::is_same_v) { - EXPECT_EQ(EColumnType::kSplitIndex64, writer->GetModel().GetField("PBool").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kSplitIndex64, writer->GetModel().GetField("PBool").GetColumnRepresentatives()[0][0]); } if constexpr (std::is_same_v) { - EXPECT_EQ(EColumnType::kSplitIndex64, writer->GetModel().GetField("PBool").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kSplitIndex64, writer->GetModel().GetField("PBool").GetColumnRepresentatives()[0][0]); } if constexpr (std::is_same_v) { - EXPECT_EQ(EColumnType::kBit, writer->GetModel().GetField("PBool").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kBit, writer->GetModel().GetField("PBool").GetColumnRepresentatives()[0][0]); } auto pBool = writer->GetModel().GetDefaultEntry().GetPtr>("PBool"); @@ -1542,12 +1542,12 @@ TEST(RNTuple, HalfPrecisionFloat) // TODO: Add std::float16 tests once available (from C++23) auto f1Fld = RFieldBase::Create("f1", "float").Unwrap(); dynamic_cast *>(f1Fld.get())->SetHalfPrecision(); - EXPECT_EQ(EColumnType::kReal16, f1Fld->GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kReal16, f1Fld->GetColumnRepresentatives()[0][0]); EXPECT_EQ("float", f1Fld->GetTypeName()); auto fVecFld = RFieldBase::Create("fVec", "std::vector").Unwrap(); dynamic_cast *>(fVecFld->GetSubFields()[0])->SetHalfPrecision(); - EXPECT_EQ(EColumnType::kReal16, fVecFld->GetSubFields()[0]->GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kReal16, fVecFld->GetSubFields()[0]->GetColumnRepresentatives()[0][0]); auto model = RNTupleModel::Create(); model->AddField(std::move(f1Fld)); @@ -1625,9 +1625,9 @@ TEST(RNTuple, Double32) } auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath()); - EXPECT_EQ(EColumnType::kReal32, reader->GetModel().GetField("d1").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kReal32, reader->GetModel().GetField("d1").GetColumnRepresentatives()[0][0]); EXPECT_EQ("", reader->GetModel().GetField("d1").GetTypeAlias()); - EXPECT_EQ(EColumnType::kSplitReal32, reader->GetModel().GetField("d2").GetColumnRepresentative()[0]); + EXPECT_EQ(EColumnType::kSplitReal32, reader->GetModel().GetField("d2").GetColumnRepresentatives()[0][0]); EXPECT_EQ("Double32_t", reader->GetModel().GetField("d2").GetTypeAlias()); auto d1 = reader->GetModel().GetDefaultEntry().GetPtr("d1"); auto d2 = reader->GetModel().GetDefaultEntry().GetPtr("d2"); diff --git a/tree/ntupleutil/v7/test/ntuple_importer.cxx b/tree/ntupleutil/v7/test/ntuple_importer.cxx index 243f04a65b026..d16a5ef52747a 100644 --- a/tree/ntupleutil/v7/test/ntuple_importer.cxx +++ b/tree/ntupleutil/v7/test/ntuple_importer.cxx @@ -254,9 +254,9 @@ TEST(RNTupleImporter, FieldModifier) EXPECT_FLOAT_EQ(2.0, *reader->GetModel().GetDefaultEntry().GetPtr("b")); EXPECT_EQ(RFieldBase::ColumnRepresentation_t{EColumnType::kReal16}, - reader->GetModel().GetField("a").GetColumnRepresentative()); + reader->GetModel().GetField("a").GetColumnRepresentatives()[0]); EXPECT_EQ(RFieldBase::ColumnRepresentation_t{EColumnType::kSplitReal32}, - reader->GetModel().GetField("b").GetColumnRepresentative()); + reader->GetModel().GetField("b").GetColumnRepresentatives()[0]); } TEST(RNTupleImporter, CString) From 49f052d8bb8803d15784add47216810c28dd595b Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Jul 2024 08:35:53 +0200 Subject: [PATCH 16/37] [ntuple] properly initialize RColumn::fRepresentationIndex --- tree/ntuple/v7/inc/ROOT/RColumn.hxx | 6 +++--- tree/ntuple/v7/inc/ROOT/RField.hxx | 15 ++++++++------- tree/ntuple/v7/src/RColumn.cxx | 5 +++-- tree/ntuple/v7/src/RField.cxx | 8 ++++---- tree/ntuple/v7/test/ntuple_cluster.cxx | 2 +- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumn.hxx b/tree/ntuple/v7/inc/ROOT/RColumn.hxx index 4a8de5e813b4e..c1c9a8824c614 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumn.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumn.hxx @@ -76,7 +76,7 @@ private: /// Used to pack and unpack pages on writing/reading std::unique_ptr fElement; - RColumn(EColumnType type, std::uint32_t index); + RColumn(EColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex); /// Used in Append() and AppendV() to handle the case when the main page reached the target size. /// If tail page optimization is enabled, switch the pages; the other page has been flushed when @@ -112,9 +112,9 @@ private: public: template - static std::unique_ptr Create(EColumnType type, std::uint32_t index) + static std::unique_ptr Create(EColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx) { - auto column = std::unique_ptr(new RColumn(type, index)); + auto column = std::unique_ptr(new RColumn(type, columnIdx, representationIdx)); column->fElement = RColumnElementBase::Generate(type); return column; } diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 9f00a29b45431..4dd0aaf32bd6a 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -425,13 +425,14 @@ protected: /// Helpers for generating columns. We use the fact that most fields have the same C++/memory types /// for all their column representations. /// Where possible, we call the helpers not from the header to reduce compilation time. - template - void GenerateColumnsImpl(const ColumnRepresentation_t &representation) + template + void GenerateColumnsImpl(const ColumnRepresentation_t &representation, std::uint16_t representationIndex) { assert(ColumnIndexT < representation.size()); - fColumns.emplace_back(Internal::RColumn::Create(representation[ColumnIndexT], ColumnIndexT)); + fColumns.emplace_back( + Internal::RColumn::Create(representation[ColumnIndexT], ColumnIndexT, representationIndex)); if constexpr (sizeof...(TailTs)) - GenerateColumnsImpl(representation); + GenerateColumnsImpl(representation, representationIndex); } /// For writing, use the currently set column representative @@ -439,10 +440,10 @@ protected: void GenerateColumnsImpl() { if (fColumnRepresentatives.empty()) { - GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault()); + GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault(), 0); } else { // TODO(jblomer): generate columns for all of the available representations - GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[0].get()); + GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[0].get(), 0); } } @@ -450,7 +451,7 @@ protected: template void GenerateColumnsImpl(const RNTupleDescriptor &desc) { - GenerateColumnsImpl<0, ColumnCppTs...>(EnsureCompatibleColumnTypes(desc)); + GenerateColumnsImpl<0, ColumnCppTs...>(EnsureCompatibleColumnTypes(desc), 0); } /// Implementations in derived classes should return a static RColumnRepresentations object. The default diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index d270d0049b91c..fd1cff36052e6 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -21,8 +21,9 @@ #include -ROOT::Experimental::Internal::RColumn::RColumn(EColumnType type, std::uint32_t index) - : fType(type), fIndex(index), fRepresentationIndex(0 /* TODO(jblomer) */) +ROOT::Experimental::Internal::RColumn::RColumn(EColumnType type, std::uint32_t columnIndex, + std::uint16_t representationIndex) + : fType(type), fIndex(columnIndex), fRepresentationIndex(representationIndex) { // TODO(jblomer): fix for column types with configurable bit length once available const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(type); diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 570398cb6582d..26cb5555e0399 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -3393,9 +3393,9 @@ void ROOT::Experimental::RNullableField::GenerateColumns() // TODO(jblomer): handle all column representatives if (GetColumnRepresentatives()[0][0] == EColumnType::kBit) { fDefaultItemValue = std::make_unique(fSubFields[0]->CreateValue()); - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0)); + fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, 0)); } else { - fColumns.emplace_back(Internal::RColumn::Create(GetColumnRepresentatives()[0][0], 0)); + fColumns.emplace_back(Internal::RColumn::Create(GetColumnRepresentatives()[0][0], 0, 0)); } } @@ -3403,9 +3403,9 @@ void ROOT::Experimental::RNullableField::GenerateColumns(const RNTupleDescriptor { auto onDiskTypes = EnsureCompatibleColumnTypes(desc); if (onDiskTypes[0] == EColumnType::kBit) { - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0)); + fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, 0)); } else { - fColumns.emplace_back(Internal::RColumn::Create(onDiskTypes[0], 0)); + fColumns.emplace_back(Internal::RColumn::Create(onDiskTypes[0], 0, 0)); } } diff --git a/tree/ntuple/v7/test/ntuple_cluster.cxx b/tree/ntuple/v7/test/ntuple_cluster.cxx index 9a6acbaa2ca23..09122c43ff8e1 100644 --- a/tree/ntuple/v7/test/ntuple_cluster.cxx +++ b/tree/ntuple/v7/test/ntuple_cluster.cxx @@ -347,7 +347,7 @@ TEST(PageStorageFile, LoadClusters) EXPECT_EQ(0U, cluster->GetId()); EXPECT_EQ(0U, cluster->GetNOnDiskPages()); - auto column = ROOT::Experimental::Internal::RColumn::Create(ROOT::Experimental::EColumnType::kReal32, 0); + auto column = ROOT::Experimental::Internal::RColumn::Create(ROOT::Experimental::EColumnType::kReal32, 0, 0); column->ConnectPageSource(ptId, source); clusterKeys[0].fClusterId = 1; clusterKeys[0].fPhysicalColumnSet.insert(colId); From 57c59f22b4f36ad13fa04ff1b1194e6f3d7db467 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Jul 2024 13:52:00 +0200 Subject: [PATCH 17/37] [ntuple] create write column for all representations --- tree/ntuple/v7/inc/ROOT/RField.hxx | 15 ++++++++++----- tree/ntuple/v7/src/RField.cxx | 22 +++++++++++++++------- tree/ntuple/v7/src/RPageStorage.cxx | 4 ++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 4dd0aaf32bd6a..487fb7045a518 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -400,11 +400,12 @@ protected: std::vector> fSubFields; /// Sub fields point to their mother field RFieldBase* fParent; - /// Points into fColumns. All fields that have columns have a distinct main column. For simple fields - /// (float, int, ...), the principal column corresponds to the field type. For collection fields expect std::array, - /// the main column is the offset field. Class fields have no column of their own. + /// Points to the first element of fColumns. All fields that have columns have a distinct main column. + /// For simple fields (float, int, ...), the principal column corresponds to the field type. For collection fields + /// except std::array, the main column is the offset field. Class fields have no column of their own. Internal::RColumn *fPrincipalColumn; /// The columns are connected either to a sink or to a source (not to both); they are owned by the field. + /// Contains all columns of all representations in order of representation and column index. std::vector> fColumns; /// Properties of the type that allow for optimizations of collections of that type int fTraits = 0; @@ -440,10 +441,14 @@ protected: void GenerateColumnsImpl() { if (fColumnRepresentatives.empty()) { + fColumns.reserve(sizeof...(ColumnCppTs)); GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault(), 0); } else { - // TODO(jblomer): generate columns for all of the available representations - GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[0].get(), 0); + const auto N = fColumnRepresentatives.size(); + fColumns.reserve(N * sizeof...(ColumnCppTs)); + for (unsigned i = 0; i < N; ++i) { + GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[i].get(), i); + } } } diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 26cb5555e0399..4bec31644a151 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -1114,7 +1114,10 @@ void ROOT::Experimental::RFieldBase::ConnectPageSink(Internal::RPageSink &pageSi if (!fColumns.empty()) fPrincipalColumn = fColumns[0].get(); for (auto &column : fColumns) { - auto firstElementIndex = (column.get() == fPrincipalColumn) ? EntryToColumnElementIndex(firstEntry) : 0; + // Only the first column of every representation can be a deferred column. In all column representations, + // larger column indexes are data columns of collections (string, unsplit) and thus + // they have no elements on late model extension + auto firstElementIndex = (column->GetIndex() == 0) ? EntryToColumnElementIndex(firstEntry) : 0; column->ConnectPageSink(fOnDiskId, pageSink, firstElementIndex); } @@ -3390,12 +3393,17 @@ ROOT::Experimental::RNullableField::GetColumnRepresentations() const void ROOT::Experimental::RNullableField::GenerateColumns() { - // TODO(jblomer): handle all column representatives - if (GetColumnRepresentatives()[0][0] == EColumnType::kBit) { - fDefaultItemValue = std::make_unique(fSubFields[0]->CreateValue()); - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, 0)); - } else { - fColumns.emplace_back(Internal::RColumn::Create(GetColumnRepresentatives()[0][0], 0, 0)); + const auto r = GetColumnRepresentatives(); + const auto N = r.size(); + fColumns.reserve(N); + for (std::uint16_t i = 0; i < N; ++i) { + if (r[i][0] == EColumnType::kBit) { + if (!fDefaultItemValue) + fDefaultItemValue = std::make_unique(fSubFields[0]->CreateValue()); + fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, i)); + } else { + fColumns.emplace_back(Internal::RColumn::Create(r[i][0], 0, i)); + } } } diff --git a/tree/ntuple/v7/src/RPageStorage.cxx b/tree/ntuple/v7/src/RPageStorage.cxx index 9bf6527ff4fd0..960db3b2f7f2b 100644 --- a/tree/ntuple/v7/src/RPageStorage.cxx +++ b/tree/ntuple/v7/src/RPageStorage.cxx @@ -616,6 +616,10 @@ ROOT::Experimental::Internal::RPagePersistentSink::AddColumn(DescriptorId_t fiel .Index(column.GetIndex()) .RepresentationIndex(column.GetRepresentationIndex()) .FirstElementIndex(column.GetFirstElementIndex()); + // For late model extension, we assume that the primary column representation is the active one for the + // deferred range. All other representations are suppressed. + if (column.GetFirstElementIndex() > 0 && column.GetRepresentationIndex() > 0) + columnBuilder.SetSuppressedDeferred(); fDescriptorBuilder.AddColumn(columnBuilder.MakeDescriptor().Unwrap()); return ColumnHandle_t{columnId, &column}; } From 3721a3e6c96fbdb2caf78ccf8f1bb82fb38bf7be Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Jul 2024 20:15:53 +0200 Subject: [PATCH 18/37] [ntuple] add RPageSink::CommitSuppressedColumn() --- tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx | 5 +++ tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx | 1 + tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx | 3 ++ tree/ntuple/v7/inc/ROOT/RPageStorage.hxx | 4 ++ tree/ntuple/v7/src/RNTupleParallelWriter.cxx | 1 + tree/ntuple/v7/src/RPageSinkBuf.cxx | 9 ++++ tree/ntuple/v7/src/RPageStorage.cxx | 43 +++++++++++++++---- tree/ntuple/v7/test/ntuple_endian.cxx | 1 + tree/ntuple/v7/test/ntuple_storage.cxx | 1 + 9 files changed, 59 insertions(+), 9 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx index 0a1e8648c22dc..0a1bf9b2d7659 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx @@ -1255,6 +1255,11 @@ public: /// should happen before calling this function. RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc); + const RClusterDescriptor::RColumnRange &GetColumnRange(DescriptorId_t physicalId) + { + return fCluster.GetColumnRange(physicalId); + } + /// Move out the full cluster descriptor including page locations RResult MoveDescriptor(); }; diff --git a/tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx b/tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx index 77c1b253878fa..fc20642cebaec 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx @@ -73,6 +73,7 @@ public: } void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &) final {} + void CommitSuppressedColumn(ColumnHandle_t) final {} void CommitPage(ColumnHandle_t, const RPage &page) final { fNBytesCurrentCluster += page.GetNBytes(); } void CommitSealedPage(DescriptorId_t, const RSealedPage &page) final { diff --git a/tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx b/tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx index 6f3235d055ce3..0b2688fa73c8d 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx @@ -126,6 +126,8 @@ private: std::unique_ptr fInnerModel; /// Vector of buffered column pages. Indexed by column id. std::vector fBufferedColumns; + /// Columns committed as suppressed are stored and passed to the inner sink at cluster commit + std::vector fSuppressedColumns; DescriptorId_t fNFields = 0; DescriptorId_t fNColumns = 0; @@ -147,6 +149,7 @@ public: void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final; void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final; + void CommitSuppressedColumn(ColumnHandle_t columnHandle) final; void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final; void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final; void CommitSealedPageV(std::span ranges) final; diff --git a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx index c97b4b9ebae6e..458a9d75d13ad 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx @@ -300,6 +300,9 @@ public: /// registered by specific fields (e.g., unsplit field) and during merging. virtual void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) = 0; + /// Commits a suppressed column for the current cluster. Can be called anytime before CommitCluster(). + /// For any given column and cluster, there must be no calls to both CommitSuppressedColumn() and page commits. + virtual void CommitSuppressedColumn(ColumnHandle_t columnHandle) = 0; /// Write a page to the storage. The column must have been added before. virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page) = 0; /// Write a preprocessed page to storage. The column must have been added before. @@ -452,6 +455,7 @@ public: /// Initialize sink based on an existing descriptor and fill into the descriptor builder. void InitFromDescriptor(const RNTupleDescriptor &descriptor); + void CommitSuppressedColumn(ColumnHandle_t columnHandle) final; void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final; void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final; void CommitSealedPageV(std::span ranges) final; diff --git a/tree/ntuple/v7/src/RNTupleParallelWriter.cxx b/tree/ntuple/v7/src/RNTupleParallelWriter.cxx index 2e81e91488f43..153f9bcb0f4c1 100644 --- a/tree/ntuple/v7/src/RNTupleParallelWriter.cxx +++ b/tree/ntuple/v7/src/RNTupleParallelWriter.cxx @@ -82,6 +82,7 @@ class RPageSynchronizingSink : public RPageSink { throw RException(R__FAIL("UpdateExtraTypeInfo not supported via RPageSynchronizingSink")); } + void CommitSuppressedColumn(ColumnHandle_t handle) final { fInnerSink->CommitSuppressedColumn(handle); } void CommitPage(ColumnHandle_t, const RPage &) final { throw RException(R__FAIL("should never commit single pages via RPageSynchronizingSink")); diff --git a/tree/ntuple/v7/src/RPageSinkBuf.cxx b/tree/ntuple/v7/src/RPageSinkBuf.cxx index 3b6f72d385572..dc6e8d318d60b 100644 --- a/tree/ntuple/v7/src/RPageSinkBuf.cxx +++ b/tree/ntuple/v7/src/RPageSinkBuf.cxx @@ -136,6 +136,11 @@ void ROOT::Experimental::Internal::RPageSinkBuf::UpdateExtraTypeInfo(const RExtr fInnerSink->UpdateExtraTypeInfo(extraTypeInfo); } +void ROOT::Experimental::Internal::RPageSinkBuf::CommitSuppressedColumn(ColumnHandle_t columnHandle) +{ + fSuppressedColumns.emplace_back(columnHandle); +} + void ROOT::Experimental::Internal::RPageSinkBuf::CommitPage(ColumnHandle_t columnHandle, const RPage &page) { auto colId = columnHandle.fPhysicalId; @@ -215,6 +220,10 @@ std::uint64_t ROOT::Experimental::Internal::RPageSinkBuf::CommitCluster(ROOT::Ex Detail::RNTuplePlainTimer timer(fCounters->fTimeWallCriticalSection, fCounters->fTimeCpuCriticalSection); fInnerSink->CommitSealedPageV(toCommit); + for (auto handle : fSuppressedColumns) + fInnerSink->CommitSuppressedColumn(handle); + fSuppressedColumns.clear(); + nbytes = fInnerSink->CommitCluster(nNewEntries); } diff --git a/tree/ntuple/v7/src/RPageStorage.cxx b/tree/ntuple/v7/src/RPageStorage.cxx index 960db3b2f7f2b..12d7ffa28ea68 100644 --- a/tree/ntuple/v7/src/RPageStorage.cxx +++ b/tree/ntuple/v7/src/RPageStorage.cxx @@ -32,11 +32,11 @@ #include #include -#include -#include +#include #include #include -#include +#include +#include ROOT::Experimental::Internal::RPageStorage::RPageStorage(std::string_view name) : fMetrics(""), fNTupleName(name) {} @@ -760,6 +760,11 @@ void ROOT::Experimental::Internal::RPagePersistentSink::InitFromDescriptor(const } } +void ROOT::Experimental::Internal::RPagePersistentSink::CommitSuppressedColumn(ColumnHandle_t columnHandle) +{ + fOpenColumnRanges.at(columnHandle.fPhysicalId).fIsSuppressed = true; +} + void ROOT::Experimental::Internal::RPagePersistentSink::CommitPage(ColumnHandle_t columnHandle, const RPage &page) { fOpenColumnRanges.at(columnHandle.fPhysicalId).fNElements += page.GetNElements(); @@ -878,14 +883,34 @@ ROOT::Experimental::Internal::RPagePersistentSink::CommitCluster(ROOT::Experimen .FirstEntryIndex(fPrevClusterNEntries) .NEntries(nNewEntries); for (unsigned int i = 0; i < fOpenColumnRanges.size(); ++i) { - RClusterDescriptor::RPageRange fullRange; - fullRange.fPhysicalColumnId = i; - std::swap(fullRange, fOpenPageRanges[i]); - clusterBuilder.CommitColumnRange(i, fOpenColumnRanges[i].fFirstElementIndex, - fOpenColumnRanges[i].fCompressionSettings, fullRange); - fOpenColumnRanges[i].fFirstElementIndex += fOpenColumnRanges[i].fNElements; + if (fOpenColumnRanges[i].fIsSuppressed) { + assert(fOpenPageRanges[i].fPageInfos.empty()); + clusterBuilder.MarkSuppressedColumnRange(i); + } else { + RClusterDescriptor::RPageRange fullRange; + fullRange.fPhysicalColumnId = i; + std::swap(fullRange, fOpenPageRanges[i]); + clusterBuilder.CommitColumnRange(i, fOpenColumnRanges[i].fFirstElementIndex, + fOpenColumnRanges[i].fCompressionSettings, fullRange); + fOpenColumnRanges[i].fFirstElementIndex += fOpenColumnRanges[i].fNElements; + fOpenColumnRanges[i].fNElements = 0; + } + } + + clusterBuilder.CommitSuppressedColumnRanges(fDescriptorBuilder.GetDescriptor()).ThrowOnError(); + for (unsigned int i = 0; i < fOpenColumnRanges.size(); ++i) { + if (!fOpenColumnRanges[i].fIsSuppressed) + continue; + // We reset suppressed columns to the state they would have if they were active (not suppressed). + // In particular, we need to reset the first element index to the first element of the next (upcoming) cluster. + // This information has been determined for the committed cluster descriptor through + // CommitSuppressedColumnRanges(), so we can use the information from the descriptor. + const auto &columnRangeFromDesc = clusterBuilder.GetColumnRange(i); + fOpenColumnRanges[i].fFirstElementIndex = columnRangeFromDesc.fFirstElementIndex + columnRangeFromDesc.fNElements; fOpenColumnRanges[i].fNElements = 0; + fOpenColumnRanges[i].fIsSuppressed = false; } + fDescriptorBuilder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); fPrevClusterNEntries += nNewEntries; return nbytes; diff --git a/tree/ntuple/v7/test/ntuple_endian.cxx b/tree/ntuple/v7/test/ntuple_endian.cxx index 4f802b93366e0..29590d860562b 100644 --- a/tree/ntuple/v7/test/ntuple_endian.cxx +++ b/tree/ntuple/v7/test/ntuple_endian.cxx @@ -51,6 +51,7 @@ class RPageSinkMock : public RPageSink { void InitImpl(RNTupleModel &) final {} void UpdateSchema(const ROOT::Experimental::Internal::RNTupleModelChangeset &, NTupleSize_t) final {} void UpdateExtraTypeInfo(const ROOT::Experimental::RExtraTypeInfoDescriptor &) final {} + void CommitSuppressedColumn(ColumnHandle_t) final {} void CommitSealedPage(ROOT::Experimental::DescriptorId_t, const RPageStorage::RSealedPage &) final {} void CommitSealedPageV(std::span) final {} std::uint64_t CommitCluster(NTupleSize_t) final { return 0; } diff --git a/tree/ntuple/v7/test/ntuple_storage.cxx b/tree/ntuple/v7/test/ntuple_storage.cxx index 845d14edd357c..40837a412385f 100644 --- a/tree/ntuple/v7/test/ntuple_storage.cxx +++ b/tree/ntuple/v7/test/ntuple_storage.cxx @@ -41,6 +41,7 @@ class RPageSinkMock : public RPageSink { void InitImpl(RNTupleModel &) final {} void UpdateSchema(const ROOT::Experimental::Internal::RNTupleModelChangeset &, NTupleSize_t) final {} void UpdateExtraTypeInfo(const ROOT::Experimental::RExtraTypeInfoDescriptor &) final {} + void CommitSuppressedColumn(ColumnHandle_t) final {} void CommitPage(ColumnHandle_t /*columnHandle*/, const RPage & /*page*/) final { fCounters.fNCommitPage++; } void CommitSealedPage(ROOT::Experimental::DescriptorId_t, const RPageStorage::RSealedPage &) final { From ca46f65f4c7c496550d9768c1c662d37956a636e Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Jul 2024 22:04:10 +0200 Subject: [PATCH 19/37] [ntuple] RFieldBase::Flush(): commit suppressed columns --- tree/ntuple/v7/inc/ROOT/RColumn.hxx | 1 + tree/ntuple/v7/src/RColumn.cxx | 5 +++++ tree/ntuple/v7/src/RField.cxx | 11 +++++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumn.hxx b/tree/ntuple/v7/inc/ROOT/RColumn.hxx index c1c9a8824c614..554783765822c 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumn.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumn.hxx @@ -330,6 +330,7 @@ public: } void Flush(); + void CommitSuppressed(); void MapPage(const NTupleSize_t index); void MapPage(RClusterIndex clusterIndex); NTupleSize_t GetNElements() const { return fNElements; } diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index fd1cff36052e6..9d2ee12a8e7ad 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -99,6 +99,11 @@ void ROOT::Experimental::Internal::RColumn::Flush() fWritePage[fWritePageIdx].Reset(fNElements); } +void ROOT::Experimental::Internal::RColumn::CommitSuppressed() +{ + fPageSink->CommitSuppressedColumn(fHandleSink); +} + void ROOT::Experimental::Internal::RColumn::MapPage(const NTupleSize_t index) { fPageSource->ReleasePage(fReadPage); diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 4bec31644a151..d7f8c7e1e454f 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -978,8 +978,15 @@ std::vector ROOT::Experimental::RFieldBa void ROOT::Experimental::RFieldBase::CommitCluster() { - for (auto& column : fColumns) { - column->Flush(); + if (!fColumns.empty()) { + const auto activeRepresentationIndex = fColumns[0]->GetRepresentationIndex(); + for (auto &column : fColumns) { + if (column->GetRepresentationIndex() == activeRepresentationIndex) { + column->Flush(); + } else { + column->CommitSuppressed(); + } + } } CommitClusterImpl(); } From b023fe01a7e04d0a2e731292a8a987d3ab5da381 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 28 Jul 2024 15:16:23 +0200 Subject: [PATCH 20/37] [ntuple] RFieldBase: attach read columns from all representations --- tree/ntuple/v7/inc/ROOT/RField.hxx | 21 ++++++++-- tree/ntuple/v7/src/RField.cxx | 60 ++++++++++++++++++---------- tree/ntuple/v7/test/ntuple_types.cxx | 2 +- 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 487fb7045a518..217f56bae8be9 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -456,7 +456,15 @@ protected: template void GenerateColumnsImpl(const RNTupleDescriptor &desc) { - GenerateColumnsImpl<0, ColumnCppTs...>(EnsureCompatibleColumnTypes(desc), 0); + std::uint16_t representationIndex = 0; + do { + const auto &onDiskTypes = EnsureCompatibleColumnTypes(desc, representationIndex); + if (onDiskTypes.empty()) + break; + GenerateColumnsImpl<0, ColumnCppTs...>(onDiskTypes, representationIndex); + fColumnRepresentatives.emplace_back(onDiskTypes); + representationIndex++; + } while (true); } /// Implementations in derived classes should return a static RColumnRepresentations object. The default @@ -469,9 +477,14 @@ protected: /// The default implementation does not attach any columns to the field. The method should check, using the page /// source and fOnDiskId, if the column types match and throw if they don't. virtual void GenerateColumns(const RNTupleDescriptor & /*desc*/) {} - /// Returns the on-disk column types found in the provided descriptor for fOnDiskId. Throws an exception if the types - /// don't match any of the deserialization types from GetColumnRepresentations(). - const ColumnRepresentation_t &EnsureCompatibleColumnTypes(const RNTupleDescriptor &desc) const; + /// Returns the on-disk column types found in the provided descriptor for fOnDiskId and the given + /// representation index. If there are no columns for the given representation index, return an empty + /// ColumnRepresentation_t list. Otherwise, the returned reference points into the static array returned by + /// GetColumnRepresentations(). + /// Throws an exception if the types on disk don't match any of the deserialization types from + /// GetColumnRepresentations(). + const ColumnRepresentation_t & + EnsureCompatibleColumnTypes(const RNTupleDescriptor &desc, std::uint16_t representationIndex) const; /// When connecting a field to a page sink, the field's default column representation is subject /// to adjustment according to the write options. E.g., if compression is turned off, encoded columns /// are changed to their unencoded counterparts. diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index d7f8c7e1e454f..df95dbc355788 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -1036,15 +1036,26 @@ void ROOT::Experimental::RFieldBase::SetColumnRepresentatives( } const ROOT::Experimental::RFieldBase::ColumnRepresentation_t & -ROOT::Experimental::RFieldBase::EnsureCompatibleColumnTypes(const RNTupleDescriptor &desc) const +ROOT::Experimental::RFieldBase::EnsureCompatibleColumnTypes(const RNTupleDescriptor &desc, + std::uint16_t representationIndex) const { + static const ColumnRepresentation_t kEmpty; + if (fOnDiskId == kInvalidDescriptorId) - throw RException(R__FAIL("No on-disk column information for field `" + GetQualifiedFieldName() + "`")); + throw RException(R__FAIL("No on-disk field information for `" + GetQualifiedFieldName() + "`")); ColumnRepresentation_t onDiskTypes; for (const auto &c : desc.GetColumnIterable(fOnDiskId)) { - onDiskTypes.emplace_back(c.GetType()); + if (c.GetRepresentationIndex() == representationIndex) + onDiskTypes.emplace_back(c.GetType()); + } + if (onDiskTypes.empty()) { + if (representationIndex == 0) { + throw RException(R__FAIL("No on-disk column information for field `" + GetQualifiedFieldName() + "`")); + } + return kEmpty; } + for (const auto &t : GetColumnRepresentations().GetDeserializationTypes()) { if (t == onDiskTypes) return t; @@ -1054,10 +1065,10 @@ ROOT::Experimental::RFieldBase::EnsureCompatibleColumnTypes(const RNTupleDescrip for (const auto &t : onDiskTypes) { if (!columnTypeNames.empty()) columnTypeNames += ", "; - columnTypeNames += Internal::RColumnElementBase::GetTypeName(t); + columnTypeNames += std::string("`") + Internal::RColumnElementBase::GetTypeName(t) + "`"; } - throw RException(R__FAIL("On-disk column types `" + columnTypeNames + "` for field `" + GetQualifiedFieldName() + - "` cannot be matched.")); + throw RException(R__FAIL("On-disk column types {" + columnTypeNames + "} for field `" + GetQualifiedFieldName() + + "` cannot be matched (representation index: " + std::to_string(representationIndex) + ")")); } size_t ROOT::Experimental::RFieldBase::AddReadCallback(ReadCallback_t func) @@ -1159,15 +1170,13 @@ void ROOT::Experimental::RFieldBase::ConnectPageSource(Internal::RPageSource &pa const auto descriptorGuard = pageSource.GetSharedDescriptorGuard(); const RNTupleDescriptor &desc = descriptorGuard.GetRef(); GenerateColumns(desc); - ColumnRepresentation_t onDiskColumnTypes; - onDiskColumnTypes.reserve(fColumns.size()); - for (const auto &c : fColumns) { - onDiskColumnTypes.emplace_back(c->GetType()); - } - for (const auto &t : GetColumnRepresentations().GetDeserializationTypes()) { - if (t == onDiskColumnTypes) { - fColumnRepresentatives = {t}; - break; + if (fColumnRepresentatives.empty()) { + // If we didn't get columns from the descriptor, ensure that we actually expect a field without columns + for (const auto &t : GetColumnRepresentations().GetDeserializationTypes()) { + if (t.empty()) { + fColumnRepresentatives = {t}; + break; + } } } R__ASSERT(!fColumnRepresentatives.empty()); @@ -3416,12 +3425,21 @@ void ROOT::Experimental::RNullableField::GenerateColumns() void ROOT::Experimental::RNullableField::GenerateColumns(const RNTupleDescriptor &desc) { - auto onDiskTypes = EnsureCompatibleColumnTypes(desc); - if (onDiskTypes[0] == EColumnType::kBit) { - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, 0)); - } else { - fColumns.emplace_back(Internal::RColumn::Create(onDiskTypes[0], 0, 0)); - } + std::uint16_t representationIndex = 0; + do { + const auto &onDiskTypes = EnsureCompatibleColumnTypes(desc, representationIndex); + if (onDiskTypes.empty()) + break; + + if (onDiskTypes[0] == EColumnType::kBit) { + fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, representationIndex)); + } else { + fColumns.emplace_back(Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); + } + fColumnRepresentatives.emplace_back(onDiskTypes); + + representationIndex++; + } while (true); } std::size_t ROOT::Experimental::RNullableField::AppendNull() diff --git a/tree/ntuple/v7/test/ntuple_types.cxx b/tree/ntuple/v7/test/ntuple_types.cxx index 933a052acf963..c6d4daef89e62 100644 --- a/tree/ntuple/v7/test/ntuple_types.cxx +++ b/tree/ntuple/v7/test/ntuple_types.cxx @@ -1740,7 +1740,7 @@ TEST(RNTuple, TClass) auto viewKlass = ntuple->GetView("klass"); FAIL() << "GetView should throw"; } catch (const RException& err) { - EXPECT_THAT(err.what(), testing::HasSubstr("No on-disk column information for field `klass.:_0.a`")); + EXPECT_THAT(err.what(), testing::HasSubstr("No on-disk field information for `klass.:_0.a`")); } } } From e779bff17b6fe3af108a626075976c1ec34ff024 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 29 Jul 2024 15:26:35 +0200 Subject: [PATCH 21/37] [ntuple] signal suppressed columns in PopulatePage() --- tree/ntuple/v7/inc/ROOT/RPage.hxx | 1 + tree/ntuple/v7/inc/ROOT/RPageStorage.hxx | 3 ++- tree/ntuple/v7/src/RPageSourceFriends.cxx | 6 ++++++ tree/ntuple/v7/src/RPageStorage.cxx | 12 ++++++++++-- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RPage.hxx b/tree/ntuple/v7/inc/ROOT/RPage.hxx index d9679182fc0ae..1f97c4fb1fd2e 100644 --- a/tree/ntuple/v7/inc/ROOT/RPage.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPage.hxx @@ -139,6 +139,7 @@ public: /// Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred column static const void *GetPageZeroBuffer(); + bool IsValid() const { return fColumnId != kInvalidColumnId; } bool IsNull() const { return fBuffer == nullptr; } bool IsPageZero() const { return fBuffer == GetPageZeroBuffer(); } bool IsEmpty() const { return fNElements == 0; } diff --git a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx index 458a9d75d13ad..99557557ae0b6 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx @@ -667,9 +667,10 @@ public: REntryRange GetEntryRange() const { return fEntryRange; } /// Allocates and fills a page that contains the index-th element. The default implementation searches - /// the page and calls PopulatePageImpl(). + /// the page and calls PopulatePageImpl(). Returns a default-constructed RPage for suppressed columns. virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex); /// Another version of `PopulatePage` that allows to specify cluster-relative indexes. + /// Returns a default-constructed RPage for suppressed columns. virtual RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex); /// Read the packed and compressed bytes of a page into the memory buffer provided by `sealedPage`. The sealed page diff --git a/tree/ntuple/v7/src/RPageSourceFriends.cxx b/tree/ntuple/v7/src/RPageSourceFriends.cxx index 7dc5df788c269..6ba3363f1924e 100644 --- a/tree/ntuple/v7/src/RPageSourceFriends.cxx +++ b/tree/ntuple/v7/src/RPageSourceFriends.cxx @@ -160,6 +160,9 @@ ROOT::Experimental::Internal::RPageSourceFriends::PopulatePage(ColumnHandle_t co columnHandle.fPhysicalId = originColumnId.fId; auto page = fSources[originColumnId.fSourceIdx]->PopulatePage(columnHandle, globalIndex); + // Suppressed column + if (!page.IsValid()) + return RPage(); auto virtualClusterId = fIdBiMap.GetVirtualId({originColumnId.fSourceIdx, page.GetClusterInfo().GetId()}); page.ChangeIds(virtualColumnId, virtualClusterId); @@ -176,6 +179,9 @@ ROOT::Experimental::Internal::RPageSourceFriends::PopulatePage(ColumnHandle_t co columnHandle.fPhysicalId = originColumnId.fId; auto page = fSources[originColumnId.fSourceIdx]->PopulatePage(columnHandle, originClusterIndex); + // Suppressed column + if (!page.IsValid()) + return RPage(); page.ChangeIds(virtualColumnId, clusterIndex.GetClusterId()); return page; diff --git a/tree/ntuple/v7/src/RPageStorage.cxx b/tree/ntuple/v7/src/RPageStorage.cxx index 12d7ffa28ea68..ecd9cea1992ba 100644 --- a/tree/ntuple/v7/src/RPageStorage.cxx +++ b/tree/ntuple/v7/src/RPageStorage.cxx @@ -332,7 +332,11 @@ ROOT::Experimental::Internal::RPageSource::PopulatePage(ColumnHandle_t columnHan throw RException(R__FAIL("entry with index " + std::to_string(globalIndex) + " out of bounds")); const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterInfo.fClusterId); - clusterInfo.fColumnOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex; + const auto &columnRange = clusterDescriptor.GetColumnRange(columnId); + if (columnRange.fIsSuppressed) + return RPage(); + + clusterInfo.fColumnOffset = columnRange.fFirstElementIndex; R__ASSERT(clusterInfo.fColumnOffset <= globalIndex); idxInCluster = globalIndex - clusterInfo.fColumnOffset; clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster); @@ -358,8 +362,12 @@ ROOT::Experimental::Internal::RPageSource::PopulatePage(ColumnHandle_t columnHan { auto descriptorGuard = GetSharedDescriptorGuard(); const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId); + const auto &columnRange = clusterDescriptor.GetColumnRange(columnId); + if (columnRange.fIsSuppressed) + return RPage(); + clusterInfo.fClusterId = clusterId; - clusterInfo.fColumnOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex; + clusterInfo.fColumnOffset = columnRange.fFirstElementIndex; clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster); } From 3a56d19ecceb30d1a9a35595f966264c2cb46b90 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 29 Jul 2024 15:40:06 +0200 Subject: [PATCH 22/37] [ntuple] skip suppressed column in cluster preloading --- tree/ntuple/v7/src/RPageStorage.cxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tree/ntuple/v7/src/RPageStorage.cxx b/tree/ntuple/v7/src/RPageStorage.cxx index ecd9cea1992ba..0a4ab4f12736a 100644 --- a/tree/ntuple/v7/src/RPageStorage.cxx +++ b/tree/ntuple/v7/src/RPageStorage.cxx @@ -299,6 +299,9 @@ void ROOT::Experimental::Internal::RPageSource::PrepareLoadCluster( const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterKey.fClusterId); for (auto physicalColumnId : clusterKey.fPhysicalColumnSet) { + if (clusterDesc.GetColumnRange(physicalColumnId).fIsSuppressed) + continue; + const auto &pageRange = clusterDesc.GetPageRange(physicalColumnId); NTupleSize_t pageNo = 0; for (const auto &pageInfo : pageRange.fPageInfos) { From be5dde7e662a14c62bbdf4b3d4511f07091075ad Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 29 Jul 2024 22:12:37 +0200 Subject: [PATCH 23/37] [ntuple] introduce column 'teams' to map interleaved columns --- tree/ntuple/v7/inc/ROOT/RColumn.hxx | 10 +++++++++ tree/ntuple/v7/inc/ROOT/RField.hxx | 5 +++++ tree/ntuple/v7/src/RColumn.cxx | 34 ++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumn.hxx b/tree/ntuple/v7/inc/ROOT/RColumn.hxx index 554783765822c..e2a123b7c296c 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumn.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumn.hxx @@ -75,6 +75,13 @@ private: NTupleSize_t fFirstElementIndex = 0; /// Used to pack and unpack pages on writing/reading std::unique_ptr fElement; + /// The column team is a set of columns that serve the same column index for different representation IDs + /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two column + /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed + /// column for a MapPage() call, it get the page from the active column in the corresponding cluster. + std::shared_ptr> fTeam; + /// Points into fTeam to the column that successfully returned the last page. + std::size_t fLastGoodTeamIdx = 0; RColumn(EColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex); @@ -110,6 +117,8 @@ private: fWritePage[otherIdx].Reset(0); } + void TeamUp(const RColumn &other) { fTeam = other.fTeam; } + public: template static std::unique_ptr Create(EColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx) @@ -333,6 +342,7 @@ public: void CommitSuppressed(); void MapPage(const NTupleSize_t index); void MapPage(RClusterIndex clusterIndex); + void MergeTeams(RColumn &other); NTupleSize_t GetNElements() const { return fNElements; } RColumnElementBase *GetElement() const { return fElement.get(); } EColumnType GetType() const { return fType; } diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 217f56bae8be9..8c5980c9363de 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -463,6 +463,11 @@ protected: break; GenerateColumnsImpl<0, ColumnCppTs...>(onDiskTypes, representationIndex); fColumnRepresentatives.emplace_back(onDiskTypes); + if (representationIndex > 0) { + for (std::size_t i = 0; i < sizeof...(ColumnCppTs); ++i) { + fColumns[i]->MergeTeams(*fColumns[representationIndex * sizeof...(ColumnCppTs) + i].get()); + } + } representationIndex++; } while (true); } diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index 9d2ee12a8e7ad..846d00ae75543 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -20,10 +20,14 @@ #include #include +#include ROOT::Experimental::Internal::RColumn::RColumn(EColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex) - : fType(type), fIndex(columnIndex), fRepresentationIndex(representationIndex) + : fType(type), + fIndex(columnIndex), + fRepresentationIndex(representationIndex), + fTeam(std::make_shared>(std::vector{this})) { // TODO(jblomer): fix for column types with configurable bit length once available const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(type); @@ -110,7 +114,16 @@ void ROOT::Experimental::Internal::RColumn::MapPage(const NTupleSize_t index) // Set fReadPage to an empty page before populating it to prevent double destruction of the previously page in case // the page population fails. fReadPage = RPage(); - fReadPage = fPageSource->PopulatePage(fHandleSource, index); + + const auto nTeam = fTeam->size(); + std::size_t iTeam = 1; + do { + fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), index); + if (fReadPage.IsValid()) + break; + fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; + } while (iTeam <= nTeam); + R__ASSERT(fReadPage.Contains(index)); } @@ -120,6 +133,21 @@ void ROOT::Experimental::Internal::RColumn::MapPage(RClusterIndex clusterIndex) // Set fReadPage to an empty page before populating it to prevent double destruction of the previously page in case // the page population fails. fReadPage = RPage(); - fReadPage = fPageSource->PopulatePage(fHandleSource, clusterIndex); + + const auto nTeam = fTeam->size(); + std::size_t iTeam = 1; + do { + fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), clusterIndex); + if (fReadPage.IsValid()) + break; + fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; + } while (iTeam <= nTeam); + R__ASSERT(fReadPage.Contains(clusterIndex)); } + +void ROOT::Experimental::Internal::RColumn::MergeTeams(RColumn &other) +{ + fTeam->emplace_back(&other); + other.TeamUp(*this); +} From 5e772a9bcf5e233dcd79b85be0da0be5a080ec9b Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 30 Jul 2024 09:54:29 +0200 Subject: [PATCH 24/37] [ntuple] fix printing descriptor with suppressed columns --- tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx index e95f328e0051f..b4ad5d7213ed0 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx @@ -41,19 +41,25 @@ struct ColumnInfo { ROOT::Experimental::DescriptorId_t fPhysicalColumnId = 0; ROOT::Experimental::DescriptorId_t fLogicalColumnId = 0; ROOT::Experimental::DescriptorId_t fFieldId = 0; - std::uint64_t fLocalOrder = 0; std::uint64_t fNElements = 0; std::uint64_t fNPages = 0; std::uint64_t fBytesOnStorage = 0; std::uint32_t fElementSize = 0; + std::uint32_t fColumnIndex = 0; + std::uint16_t fRepresentationIndex = 0; ROOT::Experimental::EColumnType fType; std::string fFieldName; std::string fFieldDescription; bool operator<(const ColumnInfo &other) const { - if (fFieldName == other.fFieldName) - return fLocalOrder < other.fLocalOrder; + if (fFieldName == other.fFieldName) { + if (fRepresentationIndex < other.fRepresentationIndex) + return true; + else if (fRepresentationIndex > other.fRepresentationIndex) + return false; + return fColumnIndex < other.fColumnIndex; + } return fFieldName < other.fFieldName; } }; @@ -108,12 +114,16 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons info.fPhysicalColumnId = column.second.GetPhysicalId(); info.fLogicalColumnId = column.second.GetLogicalId(); info.fFieldId = column.second.GetFieldId(); - info.fLocalOrder = column.second.GetIndex(); + info.fColumnIndex = column.second.GetIndex(); info.fElementSize = elementSize; info.fType = column.second.GetType(); + info.fRepresentationIndex = column.second.GetRepresentationIndex(); for (const auto &cluster : fClusterDescriptors) { auto columnRange = cluster.second.GetColumnRange(column.second.GetPhysicalId()); + if (columnRange.fIsSuppressed) + continue; + info.fNElements += columnRange.fNElements; if (compression == -1) { compression = columnRange.fCompressionSettings; @@ -183,8 +193,10 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons for (const auto &col : columns) { auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages); auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages); - std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fLocalOrder) + "]" + - " -- " + Internal::RColumnElementBase::GetTypeName(col.fType); + std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fColumnIndex); + if (col.fRepresentationIndex > 0) + nameAndType += " / R." + std::to_string(col.fRepresentationIndex); + nameAndType += "] -- " + Internal::RColumnElementBase::GetTypeName(col.fType); std::string id = std::string("{id:") + std::to_string(col.fLogicalColumnId) + "}"; if (col.fLogicalColumnId != col.fPhysicalColumnId) id += " --alias--> " + std::to_string(col.fPhysicalColumnId); From b6cd46cf1bebf8f8f5227e79a1a2ffd3be9fb362 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 30 Jul 2024 15:06:03 +0200 Subject: [PATCH 25/37] [ntuple] read nullable field with multi column representations --- tree/ntuple/v7/inc/ROOT/RColumn.hxx | 12 ++++++++++-- tree/ntuple/v7/inc/ROOT/RField.hxx | 4 ++++ tree/ntuple/v7/src/RColumn.cxx | 10 +++++----- tree/ntuple/v7/src/RField.cxx | 29 +++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumn.hxx b/tree/ntuple/v7/inc/ROOT/RColumn.hxx index e2a123b7c296c..dc5bb471b0ca7 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumn.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumn.hxx @@ -340,9 +340,17 @@ public: void Flush(); void CommitSuppressed(); - void MapPage(const NTupleSize_t index); - void MapPage(RClusterIndex clusterIndex); + + void MapPage(NTupleSize_t globalIndex) { R__ASSERT(TryMapPage(globalIndex)); } + void MapPage(RClusterIndex clusterIndex) { R__ASSERT(TryMapPage(clusterIndex)); } + bool TryMapPage(NTupleSize_t globalIndex); + bool TryMapPage(RClusterIndex clusterIndex); + + bool ReadPageContains(NTupleSize_t globalIndex) const { return fReadPage.Contains(globalIndex); } + bool ReadPageContains(RClusterIndex clusterIndex) const { return fReadPage.Contains(clusterIndex); } + void MergeTeams(RColumn &other); + NTupleSize_t GetNElements() const { return fNElements; } RColumnElementBase *GetElement() const { return fElement.get(); } EColumnType GetType() const { return fType; } diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 8c5980c9363de..30ec024862ecb 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -1556,6 +1556,10 @@ protected: std::size_t AppendValue(const void *from); void CommitClusterImpl() final { fNWritten = 0; } + // The default implementation that translates the request into a call to ReadGlobalImpl() cannot be used + // because we don't team up the columns of the different column representations for this field. + void ReadInClusterImpl(RClusterIndex clusterIndex, void *to) final; + /// Given the index of the nullable field, returns the corresponding global index of the subfield or, /// if it is null, returns kInvalidClusterIndex RClusterIndex GetItemIndex(NTupleSize_t globalIndex); diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index 846d00ae75543..b59219ffd19bc 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -108,7 +108,7 @@ void ROOT::Experimental::Internal::RColumn::CommitSuppressed() fPageSink->CommitSuppressedColumn(fHandleSink); } -void ROOT::Experimental::Internal::RColumn::MapPage(const NTupleSize_t index) +bool ROOT::Experimental::Internal::RColumn::TryMapPage(NTupleSize_t globalIndex) { fPageSource->ReleasePage(fReadPage); // Set fReadPage to an empty page before populating it to prevent double destruction of the previously page in case @@ -118,16 +118,16 @@ void ROOT::Experimental::Internal::RColumn::MapPage(const NTupleSize_t index) const auto nTeam = fTeam->size(); std::size_t iTeam = 1; do { - fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), index); + fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), globalIndex); if (fReadPage.IsValid()) break; fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; } while (iTeam <= nTeam); - R__ASSERT(fReadPage.Contains(index)); + return fReadPage.Contains(globalIndex); } -void ROOT::Experimental::Internal::RColumn::MapPage(RClusterIndex clusterIndex) +bool ROOT::Experimental::Internal::RColumn::TryMapPage(RClusterIndex clusterIndex) { fPageSource->ReleasePage(fReadPage); // Set fReadPage to an empty page before populating it to prevent double destruction of the previously page in case @@ -143,7 +143,7 @@ void ROOT::Experimental::Internal::RColumn::MapPage(RClusterIndex clusterIndex) fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; } while (iTeam <= nTeam); - R__ASSERT(fReadPage.Contains(clusterIndex)); + return fReadPage.Contains(clusterIndex); } void ROOT::Experimental::Internal::RColumn::MergeTeams(RColumn &other) diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index df95dbc355788..5e40d2500d504 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -3470,6 +3470,19 @@ std::size_t ROOT::Experimental::RNullableField::AppendValue(const void *from) ROOT::Experimental::RClusterIndex ROOT::Experimental::RNullableField::GetItemIndex(NTupleSize_t globalIndex) { + // We ensure first that the principle column points to the non-suppressed column and + // that it has the correct page mapped + if (!fPrincipalColumn->ReadPageContains(globalIndex) && !fPrincipalColumn->TryMapPage(globalIndex)) { + for (const auto &c : fColumns) { + if (!c->TryMapPage(globalIndex)) + continue; + + fPrincipalColumn = c.get(); + break; + } + R__ASSERT(fPrincipalColumn->ReadPageContains(globalIndex)); + } + RClusterIndex nullIndex; if (fPrincipalColumn->GetType() == EColumnType::kBit) { const bool isValidItem = *fPrincipalColumn->Map(globalIndex); @@ -3482,6 +3495,22 @@ ROOT::Experimental::RClusterIndex ROOT::Experimental::RNullableField::GetItemInd } } +void ROOT::Experimental::RNullableField::ReadInClusterImpl(RClusterIndex clusterIndex, void *to) +{ + if ((fColumns.size() > 1) && !fPrincipalColumn->TryMapPage(clusterIndex)) { + for (const auto &c : fColumns) { + if (!c->TryMapPage(clusterIndex)) + continue; + + fPrincipalColumn = c.get(); + break; + } + R__ASSERT(fPrincipalColumn->ReadPageContains(clusterIndex)); + } + + ReadGlobalImpl(fPrincipalColumn->GetGlobalIndex(clusterIndex), to); +} + void ROOT::Experimental::RNullableField::AcceptVisitor(Detail::RFieldVisitor &visitor) const { visitor.VisitNullableField(*this); From 9e4933db5ae462925d14bf60e90ca42f10ba0e8c Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 31 Jul 2024 01:16:24 +0200 Subject: [PATCH 26/37] [ntuple] fix friend source for multi column representations --- .../ntuple/v7/inc/ROOT/RPageSourceFriends.hxx | 1 + tree/ntuple/v7/src/RPageSourceFriends.cxx | 37 +++++++++++-------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx b/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx index 40685d92bf1cf..50a48b6b8686f 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx @@ -76,6 +76,7 @@ private: Detail::RNTupleMetrics fMetrics; std::vector> fSources; RIdBiMap fIdBiMap; + RIdBiMap fColumnMap; RNTupleDescriptorBuilder fBuilder; DescriptorId_t fNextId = 1; ///< 0 is reserved for the friend zero field diff --git a/tree/ntuple/v7/src/RPageSourceFriends.cxx b/tree/ntuple/v7/src/RPageSourceFriends.cxx index 6ba3363f1924e..b5f4c5fcdda8a 100644 --- a/tree/ntuple/v7/src/RPageSourceFriends.cxx +++ b/tree/ntuple/v7/src/RPageSourceFriends.cxx @@ -50,7 +50,7 @@ void ROOT::Experimental::Internal::RPageSourceFriends::AddVirtualField(const RNT AddVirtualField(originDesc, originIdx, f, virtualFieldId, f.GetFieldName()); for (const auto &c : originDesc.GetColumnIterable(originField)) { - auto physicalId = c.IsAliasColumn() ? fIdBiMap.GetVirtualId({originIdx, c.GetPhysicalId()}) : fNextId; + auto physicalId = c.IsAliasColumn() ? fColumnMap.GetVirtualId({originIdx, c.GetPhysicalId()}) : fNextId; RColumnDescriptorBuilder columnBuilder; columnBuilder.LogicalColumnId(fNextId) .PhysicalColumnId(physicalId) @@ -60,7 +60,7 @@ void ROOT::Experimental::Internal::RPageSourceFriends::AddVirtualField(const RNT .Index(c.GetIndex()) .RepresentationIndex(c.GetRepresentationIndex()); fBuilder.AddColumn(columnBuilder.MakeDescriptor().Unwrap()).ThrowOnError(); - fIdBiMap.Insert({originIdx, c.GetLogicalId()}, fNextId); + fColumnMap.Insert({originIdx, c.GetLogicalId()}, fNextId); fNextId++; } } @@ -77,6 +77,7 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceF if (fSources[i]->GetNEntries() != fSources[0]->GetNEntries()) { fNextId = 1; fIdBiMap.Clear(); + fColumnMap.Clear(); fBuilder.Reset(); throw RException(R__FAIL("mismatch in the number of entries of friend RNTuples")); } @@ -86,6 +87,7 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceF if (fSources[j]->GetSharedDescriptorGuard()->GetName() == descriptorGuard->GetName()) { fNextId = 1; fIdBiMap.Clear(); + fColumnMap.Clear(); fBuilder.Reset(); throw RException(R__FAIL("duplicate names of friend RNTuples")); } @@ -104,16 +106,20 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceF RClusterDescriptorBuilder clusterBuilder; clusterBuilder.ClusterId(fNextId).FirstEntryIndex(c.GetFirstEntryIndex()).NEntries(c.GetNEntries()); for (const auto &originColumnRange : c.GetColumnRangeIterable()) { - DescriptorId_t virtualColumnId = fIdBiMap.GetVirtualId({i, originColumnRange.fPhysicalColumnId}); - - auto pageRange = c.GetPageRange(originColumnRange.fPhysicalColumnId).Clone(); - pageRange.fPhysicalColumnId = virtualColumnId; - - auto firstElementIndex = originColumnRange.fFirstElementIndex; - auto compressionSettings = originColumnRange.fCompressionSettings; - - clusterBuilder.CommitColumnRange(virtualColumnId, firstElementIndex, compressionSettings, pageRange); + DescriptorId_t virtualColumnId = fColumnMap.GetVirtualId({i, originColumnRange.fPhysicalColumnId}); + if (originColumnRange.fIsSuppressed) { + clusterBuilder.MarkSuppressedColumnRange(virtualColumnId); + } else { + auto pageRange = c.GetPageRange(originColumnRange.fPhysicalColumnId).Clone(); + pageRange.fPhysicalColumnId = virtualColumnId; + + auto firstElementIndex = originColumnRange.fFirstElementIndex; + auto compressionSettings = originColumnRange.fCompressionSettings; + + clusterBuilder.CommitColumnRange(virtualColumnId, firstElementIndex, compressionSettings, pageRange); + } } + clusterBuilder.CommitSuppressedColumnRanges(fBuilder.GetDescriptor()).ThrowOnError(); fBuilder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); fIdBiMap.Insert({i, c.GetId()}, fNextId); fNextId++; @@ -133,6 +139,7 @@ ROOT::Experimental::Internal::RPageSourceFriends::CloneImpl() const cloneSources.emplace_back(f->Clone()); auto clone = std::make_unique(fNTupleName, cloneSources); clone->fIdBiMap = fIdBiMap; + clone->fColumnMap = fColumnMap; return clone; } @@ -147,7 +154,7 @@ ROOT::Experimental::Internal::RPageSourceFriends::AddColumn(DescriptorId_t field void ROOT::Experimental::Internal::RPageSourceFriends::DropColumn(ColumnHandle_t columnHandle) { RPageSource::DropColumn(columnHandle); - auto originColumnId = fIdBiMap.GetOriginId(columnHandle.fPhysicalId); + auto originColumnId = fColumnMap.GetOriginId(columnHandle.fPhysicalId); columnHandle.fPhysicalId = originColumnId.fId; fSources[originColumnId.fSourceIdx]->DropColumn(columnHandle); } @@ -156,7 +163,7 @@ ROOT::Experimental::Internal::RPage ROOT::Experimental::Internal::RPageSourceFriends::PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) { auto virtualColumnId = columnHandle.fPhysicalId; - auto originColumnId = fIdBiMap.GetOriginId(virtualColumnId); + auto originColumnId = fColumnMap.GetOriginId(virtualColumnId); columnHandle.fPhysicalId = originColumnId.fId; auto page = fSources[originColumnId.fSourceIdx]->PopulatePage(columnHandle, globalIndex); @@ -174,7 +181,7 @@ ROOT::Experimental::Internal::RPage ROOT::Experimental::Internal::RPageSourceFriends::PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) { auto virtualColumnId = columnHandle.fPhysicalId; - auto originColumnId = fIdBiMap.GetOriginId(virtualColumnId); + auto originColumnId = fColumnMap.GetOriginId(virtualColumnId); RClusterIndex originClusterIndex(fIdBiMap.GetOriginId(clusterIndex.GetClusterId()).fId, clusterIndex.GetIndex()); columnHandle.fPhysicalId = originColumnId.fId; @@ -191,7 +198,7 @@ void ROOT::Experimental::Internal::RPageSourceFriends::LoadSealedPage(Descriptor RClusterIndex clusterIndex, RSealedPage &sealedPage) { - auto originColumnId = fIdBiMap.GetOriginId(physicalColumnId); + auto originColumnId = fColumnMap.GetOriginId(physicalColumnId); RClusterIndex originClusterIndex(fIdBiMap.GetOriginId(clusterIndex.GetClusterId()).fId, clusterIndex.GetIndex()); fSources[originColumnId.fSourceIdx]->LoadSealedPage(physicalColumnId, originClusterIndex, sealedPage); From 35a5b6dd8484adb77da5a3ef2d08cb7003ac3c47 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 31 Jul 2024 01:10:19 +0200 Subject: [PATCH 27/37] [ntuple] add internal helper to swap column representation on write --- tree/ntuple/v7/inc/ROOT/RField.hxx | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 30ec024862ecb..6024433a11edb 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -69,6 +69,7 @@ class REntry; namespace Internal { struct RFieldCallbackInjector; +struct RFieldRepresentationModifier; class RPageSink; class RPageSource; // TODO(jblomer): find a better way to not have these three methods in the RFieldBase public API @@ -99,6 +100,7 @@ This is and can only be partially enforced through C++. class RFieldBase { friend class ROOT::Experimental::RCollectionField; // to move the fields from the collection model friend struct ROOT::Experimental::Internal::RFieldCallbackInjector; // used for unit tests + friend struct ROOT::Experimental::Internal::RFieldRepresentationModifier; // used for unit tests friend void Internal::CallCommitClusterOnField(RFieldBase &); friend void Internal::CallConnectPageSinkOnField(RFieldBase &, Internal::RPageSink &, NTupleSize_t); friend void Internal::CallConnectPageSourceOnField(RFieldBase &, Internal::RPageSource &); @@ -792,6 +794,24 @@ public: virtual void AcceptVisitor(Detail::RFieldVisitor &visitor) const; }; // class RFieldBase +namespace Internal { +// At some point, RFieldBase::OnClusterCommit() may allow for a user-defined callback to change the +// column representation. For now, we inject this for testing and internal use only. +struct RFieldRepresentationModifier { + static void SwapPrimayColumnRepresentation(RFieldBase &field, std::uint16_t newRepresentationIdx) + { + R__ASSERT(newRepresentationIdx != 0); + R__ASSERT(newRepresentationIdx < field.fColumnRepresentatives.size()); + const auto N = field.fColumnRepresentatives[0].get().size(); + std::swap(field.fColumnRepresentatives[0], field.fColumnRepresentatives[newRepresentationIdx]); + for (std::size_t i = 0; i < N; ++i) { + std::swap(field.fColumns[i], field.fColumns[i + newRepresentationIdx * N]); + } + field.fPrincipalColumn = field.fColumns[0].get(); + } +}; +} // namespace Internal + /// The container field for an ntuple model, which itself has no physical representation. /// Therefore, the zero field must not be connected to a page source or sink. class RFieldZero final : public RFieldBase { From 3091308b0053e309a3c76a6e484e4ef26ccd8d09 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 28 Jul 2024 15:20:16 +0200 Subject: [PATCH 28/37] [ntuple] add tests for multi column representation --- tree/ntuple/v7/test/CMakeLists.txt | 1 + tree/ntuple/v7/test/ntuple_multi_column.cxx | 378 ++++++++++++++++++++ 2 files changed, 379 insertions(+) create mode 100644 tree/ntuple/v7/test/ntuple_multi_column.cxx diff --git a/tree/ntuple/v7/test/CMakeLists.txt b/tree/ntuple/v7/test/CMakeLists.txt index 5b647c70405f4..72ab3293d2386 100644 --- a/tree/ntuple/v7/test/CMakeLists.txt +++ b/tree/ntuple/v7/test/CMakeLists.txt @@ -38,6 +38,7 @@ ROOT_ADD_GTEST(ntuple_friends ntuple_friends.cxx LIBRARIES ROOTNTuple CustomStru ROOT_ADD_GTEST(ntuple_merger ntuple_merger.cxx LIBRARIES ROOTNTuple CustomStruct ZLIB::ZLIB) ROOT_ADD_GTEST(ntuple_metrics ntuple_metrics.cxx LIBRARIES ROOTNTuple CustomStruct) ROOT_ADD_GTEST(ntuple_model ntuple_model.cxx LIBRARIES ROOTNTuple CustomStruct) +ROOT_ADD_GTEST(ntuple_multi_column ntuple_multi_column.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_packing ntuple_packing.cxx LIBRARIES ROOTNTuple CustomStruct) ROOT_ADD_GTEST(ntuple_pages ntuple_pages.cxx LIBRARIES ROOTNTuple CustomStruct) ROOT_ADD_GTEST(ntuple_print ntuple_print.cxx LIBRARIES ROOTNTuple CustomStruct) diff --git a/tree/ntuple/v7/test/ntuple_multi_column.cxx b/tree/ntuple/v7/test/ntuple_multi_column.cxx new file mode 100644 index 0000000000000..9b6f520ff09ee --- /dev/null +++ b/tree/ntuple/v7/test/ntuple_multi_column.cxx @@ -0,0 +1,378 @@ +#include "ntuple_test.hxx" + +TEST(RNTuple, MultiColumnRepresentationSimple) +{ + using ROOT::Experimental::kUnknownCompressionSettings; + FileRaii fileGuard("test_ntuple_multi_column_representation_simple.root"); + + { + auto model = RNTupleModel::Create(); + auto fldPx = RFieldBase::Create("px", "float").Unwrap(); + fldPx->SetColumnRepresentatives({{EColumnType::kReal32}, {EColumnType::kReal16}}); + model->AddField(std::move(fldPx)); + auto ptrPx = model->GetDefaultEntry().GetPtr("px"); + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath(), options); + *ptrPx = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("px")), 1); + *ptrPx = 2.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("px")), 1); + *ptrPx = 3.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + EXPECT_EQ(3u, reader->GetModel().GetField("px").GetNElements()); + + const auto &desc = reader->GetDescriptor(); + EXPECT_EQ(3u, desc.GetNClusters()); + + const auto &fieldDesc = desc.GetFieldDescriptor(desc.FindFieldId("px")); + EXPECT_EQ(1u, fieldDesc.GetColumnCardinality()); + EXPECT_EQ(2u, fieldDesc.GetLogicalColumnIds().size()); + + const auto &colDesc32 = desc.GetColumnDescriptor(fieldDesc.GetLogicalColumnIds()[0]); + const auto &colDesc16 = desc.GetColumnDescriptor(fieldDesc.GetLogicalColumnIds()[1]); + + EXPECT_EQ(EColumnType::kReal32, colDesc32.GetType()); + EXPECT_EQ(0u, colDesc32.GetRepresentationIndex()); + EXPECT_EQ(EColumnType::kReal16, colDesc16.GetType()); + EXPECT_EQ(1u, colDesc16.GetRepresentationIndex()); + + const auto &clusterDesc0 = desc.GetClusterDescriptor(0); + EXPECT_FALSE(clusterDesc0.GetColumnRange(colDesc32.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc0.GetColumnRange(colDesc32.GetPhysicalId()).fNElements); + EXPECT_EQ(0u, clusterDesc0.GetColumnRange(colDesc32.GetPhysicalId()).fFirstElementIndex); + EXPECT_TRUE(clusterDesc0.GetColumnRange(colDesc16.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc0.GetColumnRange(colDesc16.GetPhysicalId()).fNElements); + EXPECT_EQ(0u, clusterDesc0.GetColumnRange(colDesc16.GetPhysicalId()).fFirstElementIndex); + EXPECT_EQ(kUnknownCompressionSettings, clusterDesc0.GetColumnRange(colDesc16.GetPhysicalId()).fCompressionSettings); + + const auto &clusterDesc1 = desc.GetClusterDescriptor(1); + EXPECT_FALSE(clusterDesc1.GetColumnRange(colDesc16.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc1.GetColumnRange(colDesc16.GetPhysicalId()).fNElements); + EXPECT_EQ(1u, clusterDesc1.GetColumnRange(colDesc16.GetPhysicalId()).fFirstElementIndex); + EXPECT_TRUE(clusterDesc1.GetColumnRange(colDesc32.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc1.GetColumnRange(colDesc32.GetPhysicalId()).fNElements); + EXPECT_EQ(1u, clusterDesc1.GetColumnRange(colDesc32.GetPhysicalId()).fFirstElementIndex); + EXPECT_EQ(kUnknownCompressionSettings, clusterDesc1.GetColumnRange(colDesc32.GetPhysicalId()).fCompressionSettings); + + const auto &clusterDesc2 = desc.GetClusterDescriptor(2); + EXPECT_FALSE(clusterDesc2.GetColumnRange(colDesc32.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc2.GetColumnRange(colDesc32.GetPhysicalId()).fNElements); + EXPECT_EQ(2u, clusterDesc2.GetColumnRange(colDesc32.GetPhysicalId()).fFirstElementIndex); + EXPECT_TRUE(clusterDesc2.GetColumnRange(colDesc16.GetPhysicalId()).fIsSuppressed); + EXPECT_EQ(1u, clusterDesc2.GetColumnRange(colDesc16.GetPhysicalId()).fNElements); + EXPECT_EQ(2u, clusterDesc2.GetColumnRange(colDesc16.GetPhysicalId()).fFirstElementIndex); + EXPECT_EQ(kUnknownCompressionSettings, clusterDesc2.GetColumnRange(colDesc16.GetPhysicalId()).fCompressionSettings); + + auto ptrPx = reader->GetModel().GetDefaultEntry().GetPtr("px"); + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(1.0, *ptrPx); + reader->LoadEntry(1); + EXPECT_FLOAT_EQ(2.0, *ptrPx); + reader->LoadEntry(2); + EXPECT_FLOAT_EQ(3.0, *ptrPx); + + auto viewPx = reader->GetView("px"); + EXPECT_FLOAT_EQ(1.0, viewPx(0)); + EXPECT_FLOAT_EQ(2.0, viewPx(1)); + EXPECT_FLOAT_EQ(3.0, viewPx(2)); + + std::ostringstream osDetails; + reader->PrintInfo(ROOT::Experimental::ENTupleInfo::kStorageDetails, osDetails); + const std::string reference = std::string("") + "============================================================\n" + "NTUPLE: ntpl\n" + "Compression: 0\n" + "------------------------------------------------------------\n" + " # Entries: 3\n" + " # Fields: 2\n" + " # Columns: 2\n" + " # Alias Columns: 0\n" + " # Pages: 3\n" + " # Clusters: 3\n" + " Size on storage: .* B\n" + " Compression rate: .*\n" + " Header size: .* B\n" + " Footer size: .* B\n" + " Meta-data / data: .*\n" + "------------------------------------------------------------\n" + "CLUSTER DETAILS\n" + "------------------------------------------------------------\n" + " # 0 Entry range: .0..0. -- 1\n" + " # Pages: 1\n" + " Size on storage: 4 B\n" + " Compression: 1.00\n" + " # 1 Entry range: .1..1. -- 1\n" + " # Pages: 1\n" + " Size on storage: 2 B\n" + " Compression: 2.00\n" + " # 2 Entry range: .2..2. -- 1\n" + " # Pages: 1\n" + " Size on storage: 4 B\n" + " Compression: 1.00\n" + "------------------------------------------------------------\n" + "COLUMN DETAILS\n" + "------------------------------------------------------------\n" + " px .#0. -- Real32 .id:0.\n" + " # Elements: 2\n" + " # Pages: 2\n" + " Avg elements / page: 1\n" + " Avg page size: 4 B\n" + " Size on storage: 8 B\n" + " Compression: 1.00\n" + "............................................................\n" + " px .#0 / R.1. -- Real16 .id:1.\n" + " # Elements: 1\n" + " # Pages: 1\n" + " Avg elements / page: 1\n" + " Avg page size: 2 B\n" + " Size on storage: 2 B\n" + " Compression: 2.00\n" + "............................................................\n"; + EXPECT_THAT(osDetails.str(), testing::MatchesRegex(reference)); +} + +TEST(RNTuple, MultiColumnRepresentationString) +{ + using ROOT::Experimental::kUnknownCompressionSettings; + FileRaii fileGuard("test_ntuple_multi_column_representation_string.root"); + + { + auto model = RNTupleModel::Create(); + auto fldStr = RFieldBase::Create("str", "std::string").Unwrap(); + fldStr->SetColumnRepresentatives( + {{EColumnType::kIndex32, EColumnType::kChar}, {EColumnType::kSplitIndex64, EColumnType::kChar}}); + model->AddField(std::move(fldStr)); + auto ptrStr = model->GetDefaultEntry().GetPtr("str"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrStr = "abc"; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("str")), 1); + ptrStr->clear(); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("str")), 1); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("str")), 1); + *ptrStr = "x"; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + EXPECT_EQ(4u, reader->GetNEntries()); + auto ptrStr = reader->GetModel().GetDefaultEntry().GetPtr("str"); + reader->LoadEntry(0); + EXPECT_EQ("abc", *ptrStr); + reader->LoadEntry(1); + EXPECT_TRUE(ptrStr->empty()); + reader->LoadEntry(2); + EXPECT_TRUE(ptrStr->empty()); + reader->LoadEntry(3); + EXPECT_EQ("x", *ptrStr); +} + +TEST(RNTuple, MultiColumnRepresentationVector) +{ + using ROOT::Experimental::kUnknownCompressionSettings; + FileRaii fileGuard("test_ntuple_multi_column_representation_vector.root"); + + { + auto model = RNTupleModel::Create(); + auto fldVec = RFieldBase::Create("vec", "std::vector").Unwrap(); + fldVec->SetColumnRepresentatives({{EColumnType::kIndex32}, {EColumnType::kSplitIndex64}}); + model->AddField(std::move(fldVec)); + auto ptrVec = model->GetDefaultEntry().GetPtr>("vec"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 1); + ptrVec->push_back(1.0); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 1); + ptrVec->clear(); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 1); + ptrVec->push_back(2.0); + ptrVec->push_back(3.0); + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + EXPECT_EQ(4u, reader->GetNEntries()); + auto ptrVec = reader->GetModel().GetDefaultEntry().GetPtr>("vec"); + reader->LoadEntry(0); + EXPECT_TRUE(ptrVec->empty()); + reader->LoadEntry(1); + EXPECT_EQ(1u, ptrVec->size()); + EXPECT_FLOAT_EQ(1.0, ptrVec->at(0)); + reader->LoadEntry(2); + EXPECT_TRUE(ptrVec->empty()); + reader->LoadEntry(3); + EXPECT_EQ(2u, ptrVec->size()); + EXPECT_FLOAT_EQ(2.0, ptrVec->at(0)); + EXPECT_FLOAT_EQ(3.0, ptrVec->at(1)); +} + +TEST(RNTuple, MultiColumnRepresentationNullable) +{ + FileRaii fileGuard("test_ntuple_multi_column_representation_nullable.root"); + + { + auto model = RNTupleModel::Create(); + auto fldScalar = RFieldBase::Create("scalar", "std::optional").Unwrap(); + auto fldVector = RFieldBase::Create("vector", "std::vector>").Unwrap(); + fldScalar->SetColumnRepresentatives({{EColumnType::kBit}, {EColumnType::kSplitIndex64}}); + fldVector->GetSubFields()[0]->SetColumnRepresentatives({{EColumnType::kSplitIndex64}, {EColumnType::kBit}}); + model->AddField(std::move(fldScalar)); + model->AddField(std::move(fldVector)); + auto ptrScalar = model->GetDefaultEntry().GetPtr>("scalar"); + auto ptrVector = model->GetDefaultEntry().GetPtr>>("vector"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrScalar = 1.0; + ptrVector->push_back(13.0); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("scalar")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("vector._0")), 1); + ptrScalar->reset(); + ptrVector->clear(); + ptrVector->push_back(std::optional()); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("scalar")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("vector._0")), 1); + *ptrScalar = 3.0; + ptrVector->clear(); + ptrVector->push_back(15.0); + ptrVector->push_back(17.0); + writer->Fill(); + writer->CommitCluster(); + ptrScalar->reset(); + ptrVector->clear(); + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + EXPECT_EQ(4u, reader->GetNEntries()); + auto ptrScalar = reader->GetModel().GetDefaultEntry().GetPtr>("scalar"); + auto ptrVector = reader->GetModel().GetDefaultEntry().GetPtr>>("vector"); + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(1.0, ptrScalar->value()); + EXPECT_FLOAT_EQ(1u, ptrVector->size()); + EXPECT_FLOAT_EQ(13.0, ptrVector->at(0).value()); + reader->LoadEntry(1); + EXPECT_FALSE(ptrScalar->has_value()); + EXPECT_FLOAT_EQ(1u, ptrVector->size()); + EXPECT_FALSE(ptrVector->at(0).has_value()); + reader->LoadEntry(2); + EXPECT_FLOAT_EQ(3.0, ptrScalar->value()); + EXPECT_FLOAT_EQ(2u, ptrVector->size()); + EXPECT_FLOAT_EQ(15.0, ptrVector->at(0).value()); + EXPECT_FLOAT_EQ(17.0, ptrVector->at(1).value()); + reader->LoadEntry(3); + EXPECT_FALSE(ptrScalar->has_value()); + EXPECT_TRUE(ptrVector->empty()); +} + +TEST(RNTuple, MultiColumnRepresentationBulk) +{ + FileRaii fileGuard("test_ntuple_multi_column_representation_bulk.root"); + + { + auto model = RNTupleModel::Create(); + auto fldPx = RFieldBase::Create("px", "float").Unwrap(); + fldPx->SetColumnRepresentatives({{EColumnType::kReal32}, {EColumnType::kReal16}}); + model->AddField(std::move(fldPx)); + auto ptrPx = model->GetDefaultEntry().GetPtr("px"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrPx = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("px")), 1); + *ptrPx = 2.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + RFieldBase::RBulk bulk = reader->GetModel().CreateBulk("px"); + + auto mask = std::make_unique(1); + mask[0] = true; + auto arr = static_cast(bulk.ReadBulk(RClusterIndex(0, 0), mask.get(), 1)); + EXPECT_FLOAT_EQ(1.0, arr[0]); + + arr = static_cast(bulk.ReadBulk(RClusterIndex(1, 0), mask.get(), 1)); + EXPECT_FLOAT_EQ(2.0, arr[0]); +} + +TEST(RNTuple, MultiColumnRepresentationFriends) +{ + FileRaii fileGuard1("test_ntuple_multi_column_representation_friend1.root"); + FileRaii fileGuard2("test_ntuple_multi_column_representation_friend2.root"); + + auto model1 = RNTupleModel::Create(); + auto fldPt = RFieldBase::Create("pt", "float").Unwrap(); + fldPt->SetColumnRepresentatives({{EColumnType::kReal32}, {EColumnType::kReal16}}); + model1->AddField(std::move(fldPt)); + auto ptrPt = model1->GetDefaultEntry().GetPtr("pt"); + + auto model2 = RNTupleModel::Create(); + auto fldEta = RFieldBase::Create("eta", "float").Unwrap(); + fldEta->SetColumnRepresentatives({{EColumnType::kReal16}, {EColumnType::kReal32}}); + model2->AddField(std::move(fldEta)); + auto ptrEta = model2->GetDefaultEntry().GetPtr("eta"); + + { + auto writer = RNTupleWriter::Recreate(std::move(model1), "ntpl1", fileGuard1.GetPath()); + *ptrPt = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("pt")), 1); + *ptrPt = 2.0; + writer->Fill(); + } + { + auto writer = RNTupleWriter::Recreate(std::move(model2), "ntpl2", fileGuard2.GetPath()); + *ptrEta = 3.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("eta")), 1); + *ptrEta = 4.0; + writer->Fill(); + } + + std::vector friends{{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}}; + auto reader = RNTupleReader::OpenFriends(friends); + EXPECT_EQ(2u, reader->GetNEntries()); + auto viewPt = reader->GetView("ntpl1.pt"); + EXPECT_FLOAT_EQ(1.0, viewPt(0)); + EXPECT_FLOAT_EQ(2.0, viewPt(1)); + auto viewEta = reader->GetView("ntpl2.eta"); + EXPECT_FLOAT_EQ(3.0, viewEta(0)); + EXPECT_FLOAT_EQ(4.0, viewEta(1)); +} From 26e382004f67cf2cb85ffd6f8e2a4c2b64ae5736 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 30 Jul 2024 16:09:43 +0200 Subject: [PATCH 29/37] [ntuple] skip suppressed column ranges in inspector --- tree/ntupleutil/v7/src/RNTupleInspector.cxx | 3 +++ tree/ntupleutil/v7/test/ntuple_inspector.cxx | 25 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tree/ntupleutil/v7/src/RNTupleInspector.cxx b/tree/ntupleutil/v7/src/RNTupleInspector.cxx index f9dc60c53331e..df3fb7ae21dfa 100644 --- a/tree/ntupleutil/v7/src/RNTupleInspector.cxx +++ b/tree/ntupleutil/v7/src/RNTupleInspector.cxx @@ -64,6 +64,9 @@ void ROOT::Experimental::RNTupleInspector::CollectColumnInfo() } auto columnRange = clusterDescriptor.GetColumnRange(colId); + if (columnRange.fIsSuppressed) + continue; + nElems += columnRange.fNElements; if (fCompressionSettings == -1) { diff --git a/tree/ntupleutil/v7/test/ntuple_inspector.cxx b/tree/ntupleutil/v7/test/ntuple_inspector.cxx index 7c36efcb279f1..cbf65097d6b52 100644 --- a/tree/ntupleutil/v7/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/v7/test/ntuple_inspector.cxx @@ -760,3 +760,28 @@ TEST(RNTupleInspector, FieldsByName) EXPECT_EQ("std::int32_t", inspector->GetFieldTreeInspector(fieldId).GetDescriptor().GetTypeName()); } } + +TEST(RNTupleInspector, MultiColumnRepresentations) +{ + FileRaii fileGuard("test_ntuple_inspector_multi_column_representations.root"); + { + auto model = RNTupleModel::Create(); + auto fldPx = RFieldBase::Create("px", "float").Unwrap(); + fldPx->SetColumnRepresentatives({{EColumnType::kReal32}, {EColumnType::kReal16}}); + model->AddField(std::move(fldPx)); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + const_cast(writer->GetModel().GetField("px")), 1); + writer->Fill(); + } + + auto inspector = RNTupleInspector::Create("ntpl", fileGuard.GetPath()); + auto px0Inspector = inspector->GetColumnInspector(0); + auto px1Inspector = inspector->GetColumnInspector(1); + EXPECT_EQ(EColumnType::kReal32, px0Inspector.GetType()); + EXPECT_EQ(1u, px0Inspector.GetNElements()); + EXPECT_EQ(EColumnType::kReal16, px1Inspector.GetType()); + EXPECT_EQ(1u, px1Inspector.GetNElements()); +} From a3bf33c6a4012ee3f37eeeda545f1f22284667ce Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 31 Jul 2024 23:48:37 +0200 Subject: [PATCH 30/37] [ntuple] minor improvement in descriptor printer --- tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx index b4ad5d7213ed0..693adb2066092 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx @@ -54,11 +54,9 @@ struct ColumnInfo { bool operator<(const ColumnInfo &other) const { if (fFieldName == other.fFieldName) { - if (fRepresentationIndex < other.fRepresentationIndex) - return true; - else if (fRepresentationIndex > other.fRepresentationIndex) - return false; - return fColumnIndex < other.fColumnIndex; + if (fRepresentationIndex == other.fRepresentationIndex) + return fColumnIndex < other.fColumnIndex; + return fRepresentationIndex < other.fRepresentationIndex; } return fFieldName < other.fFieldName; } From 661a48633cf79749c72c325202aec91ac665c2de Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 31 Jul 2024 23:42:54 +0200 Subject: [PATCH 31/37] [ntuple] adjust column handling in fields Renames fColumns to fAvailableColumns, which keeps the promised invariant of storing columns in order of representation index and column index. Instead of swapping owned columns when the column representation changes during write (RFieldRepresentationModifier), we now instead just move the fPrincipalColumn non-owning pointer. To accomodate for fields with two columns per representation (string, unsplit), we add a non-owning fAuxiliaryColumn that works accordingly to fPrincipalColumn. --- tree/ntuple/v7/inc/ROOT/RField.hxx | 53 ++++++++----- tree/ntuple/v7/src/RField.cxx | 79 ++++++++++---------- tree/ntuple/v7/test/ntuple_multi_column.cxx | 40 +++++----- tree/ntupleutil/v7/test/ntuple_inspector.cxx | 2 +- 4 files changed, 96 insertions(+), 78 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 6024433a11edb..09973ed725cfc 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -402,13 +402,19 @@ protected: std::vector> fSubFields; /// Sub fields point to their mother field RFieldBase* fParent; - /// Points to the first element of fColumns. All fields that have columns have a distinct main column. - /// For simple fields (float, int, ...), the principal column corresponds to the field type. For collection fields - /// except std::array, the main column is the offset field. Class fields have no column of their own. - Internal::RColumn *fPrincipalColumn; + /// All fields that have columns have a distinct main column. E.g., for simple fields (float, int, ...), the + /// principal column corresponds to the field type. For collection fields except fixed-sized arrays, + /// the main column is the offset field. Class fields have no column of their own. + /// When reading, points to any column of the column team of the active representation. Usually, this is just + /// the first column, except for the nullable field. + /// When writing, points to the first column index of the currently active (not suppressed) column representation. + Internal::RColumn *fPrincipalColumn = nullptr; + /// Some fields have a second column in its column representation. In this case, fAuxiliaryColumn points into + /// fAvailableColumns to the column that immediately follows the column fPrincipalColumn points to. + Internal::RColumn *fAuxiliaryColumn = nullptr; /// The columns are connected either to a sink or to a source (not to both); they are owned by the field. /// Contains all columns of all representations in order of representation and column index. - std::vector> fColumns; + std::vector> fAvailableColumns; /// Properties of the type that allow for optimizations of collections of that type int fTraits = 0; /// A typedef or using name that was used when creating the field @@ -432,8 +438,19 @@ protected: void GenerateColumnsImpl(const ColumnRepresentation_t &representation, std::uint16_t representationIndex) { assert(ColumnIndexT < representation.size()); - fColumns.emplace_back( + fAvailableColumns.emplace_back( Internal::RColumn::Create(representation[ColumnIndexT], ColumnIndexT, representationIndex)); + + // Initially, the first two columns become the active column representation + if (representationIndex == 0 && !fPrincipalColumn) { + fPrincipalColumn = fAvailableColumns.back().get(); + } else if (representationIndex == 0 && !fAuxiliaryColumn) { + fAuxiliaryColumn = fAvailableColumns.back().get(); + } else { + // We currently have no fields with more than 2 columns in its column representation + R__ASSERT(representationIndex > 0); + } + if constexpr (sizeof...(TailTs)) GenerateColumnsImpl(representation, representationIndex); } @@ -443,11 +460,11 @@ protected: void GenerateColumnsImpl() { if (fColumnRepresentatives.empty()) { - fColumns.reserve(sizeof...(ColumnCppTs)); + fAvailableColumns.reserve(sizeof...(ColumnCppTs)); GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault(), 0); } else { const auto N = fColumnRepresentatives.size(); - fColumns.reserve(N * sizeof...(ColumnCppTs)); + fAvailableColumns.reserve(N * sizeof...(ColumnCppTs)); for (unsigned i = 0; i < N; ++i) { GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[i].get(), i); } @@ -467,7 +484,8 @@ protected: fColumnRepresentatives.emplace_back(onDiskTypes); if (representationIndex > 0) { for (std::size_t i = 0; i < sizeof...(ColumnCppTs); ++i) { - fColumns[i]->MergeTeams(*fColumns[representationIndex * sizeof...(ColumnCppTs) + i].get()); + fAvailableColumns[i]->MergeTeams( + *fAvailableColumns[representationIndex * sizeof...(ColumnCppTs) + i].get()); } } representationIndex++; @@ -798,16 +816,17 @@ namespace Internal { // At some point, RFieldBase::OnClusterCommit() may allow for a user-defined callback to change the // column representation. For now, we inject this for testing and internal use only. struct RFieldRepresentationModifier { - static void SwapPrimayColumnRepresentation(RFieldBase &field, std::uint16_t newRepresentationIdx) + static void SetPrimaryColumnRepresentation(RFieldBase &field, std::uint16_t newRepresentationIdx) { - R__ASSERT(newRepresentationIdx != 0); R__ASSERT(newRepresentationIdx < field.fColumnRepresentatives.size()); const auto N = field.fColumnRepresentatives[0].get().size(); - std::swap(field.fColumnRepresentatives[0], field.fColumnRepresentatives[newRepresentationIdx]); - for (std::size_t i = 0; i < N; ++i) { - std::swap(field.fColumns[i], field.fColumns[i + newRepresentationIdx * N]); + R__ASSERT(N >= 1 && N <= 2); + R__ASSERT(field.fPrincipalColumn); + field.fPrincipalColumn = field.fAvailableColumns[newRepresentationIdx * N].get(); + if (field.fAuxiliaryColumn) { + R__ASSERT(N == 2); + field.fAuxiliaryColumn = field.fAvailableColumns[newRepresentationIdx * N + 1].get(); } - field.fPrincipalColumn = field.fColumns[0].get(); } }; } // namespace Internal @@ -2697,8 +2716,8 @@ protected: nbytes += CallAppendOn(*fSubFields[0], &typedValue->data()[i]); } this->fNWritten += count; - fColumns[0]->Append(&this->fNWritten); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&this->fNWritten); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ReadGlobalImpl(NTupleSize_t globalIndex, void *to) final { diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 5e40d2500d504..c973d885b220d 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -978,9 +978,9 @@ std::vector ROOT::Experimental::RFieldBa void ROOT::Experimental::RFieldBase::CommitCluster() { - if (!fColumns.empty()) { - const auto activeRepresentationIndex = fColumns[0]->GetRepresentationIndex(); - for (auto &column : fColumns) { + if (!fAvailableColumns.empty()) { + const auto activeRepresentationIndex = fPrincipalColumn->GetRepresentationIndex(); + for (auto &column : fAvailableColumns) { if (column->GetRepresentationIndex() == activeRepresentationIndex) { column->Flush(); } else { @@ -1129,9 +1129,7 @@ void ROOT::Experimental::RFieldBase::ConnectPageSink(Internal::RPageSink &pageSi AutoAdjustColumnTypes(pageSink.GetWriteOptions()); GenerateColumns(); - if (!fColumns.empty()) - fPrincipalColumn = fColumns[0].get(); - for (auto &column : fColumns) { + for (auto &column : fAvailableColumns) { // Only the first column of every representation can be a deferred column. In all column representations, // larger column indexes are data columns of collections (string, unsplit) and thus // they have no elements on late model extension @@ -1187,9 +1185,7 @@ void ROOT::Experimental::RFieldBase::ConnectPageSource(Internal::RPageSource &pa fOnDiskTypeChecksum = *fieldDesc.GetTypeChecksum(); } } - if (!fColumns.empty()) - fPrincipalColumn = fColumns[0].get(); - for (auto& column : fColumns) + for (auto &column : fAvailableColumns) column->ConnectPageSource(fOnDiskId, pageSource); OnConnectPageSource(); @@ -1504,10 +1500,10 @@ std::size_t ROOT::Experimental::RField::AppendImpl(const void *from { auto typedValue = static_cast(from); auto length = typedValue->length(); - fColumns[1]->AppendV(typedValue->data(), length); + fAuxiliaryColumn->AppendV(typedValue->data(), length); fIndex += length; - fColumns[0]->Append(&fIndex); - return length + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fIndex); + return length + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RField::ReadGlobalImpl(ROOT::Experimental::NTupleSize_t globalIndex, void *to) @@ -1520,7 +1516,7 @@ void ROOT::Experimental::RField::ReadGlobalImpl(ROOT::Experimental: typedValue->clear(); } else { typedValue->resize(nChars); - fColumns[1]->ReadV(collectionStart, nChars, const_cast(typedValue->data())); + fAuxiliaryColumn->ReadV(collectionStart, nChars, const_cast(typedValue->data())); } } @@ -1896,10 +1892,10 @@ std::size_t ROOT::Experimental::RUnsplitField::AppendImpl(const void *from) fClass->Streamer(const_cast(from), buffer); auto nbytes = buffer.Length(); - fColumns[1]->AppendV(buffer.Buffer(), buffer.Length()); + fAuxiliaryColumn->AppendV(buffer.Buffer(), buffer.Length()); fIndex += nbytes; - fColumns[0]->Append(&fIndex); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fIndex); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RUnsplitField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -1909,7 +1905,7 @@ void ROOT::Experimental::RUnsplitField::ReadGlobalImpl(NTupleSize_t globalIndex, fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nbytes); TBufferFile buffer(TBuffer::kRead, nbytes); - fColumns[1]->ReadV(collectionStart, nbytes, buffer.Buffer()); + fAuxiliaryColumn->ReadV(collectionStart, nbytes, buffer.Buffer()); fClass->Streamer(to, buffer); } @@ -2152,8 +2148,8 @@ std::size_t ROOT::Experimental::RProxiedCollectionField::AppendImpl(const void * } fNWritten += count; - fColumns[0]->Append(&fNWritten); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fNWritten); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RProxiedCollectionField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -2404,8 +2400,8 @@ std::size_t ROOT::Experimental::RVectorField::AppendImpl(const void *from) } fNWritten += count; - fColumns[0]->Append(&fNWritten); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fNWritten); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RVectorField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -2543,8 +2539,8 @@ std::size_t ROOT::Experimental::RRVecField::AppendImpl(const void *from) } fNWritten += *sizePtr; - fColumns[0]->Append(&fNWritten); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fNWritten); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RRVecField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -2782,8 +2778,8 @@ std::size_t ROOT::Experimental::RField>::AppendImpl(const void CallAppendOn(*fSubFields[0], &bval); } fNWritten += count; - fColumns[0]->Append(&fNWritten); - return count + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fNWritten); + return count + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RField>::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -3120,7 +3116,7 @@ std::size_t ROOT::Experimental::RBitsetField::AppendImpl(const void *from) for (std::size_t word = 0; word < (fN + kBitsPerWord - 1) / kBitsPerWord; ++word) { for (std::size_t mask = 0; (mask < kBitsPerWord) && (i < fN); ++mask, ++i) { elementValue = (asULongArray[word] & (static_cast(1) << mask)) != 0; - fColumns[0]->Append(&elementValue); + fPrincipalColumn->Append(&elementValue); } } return fN; @@ -3131,7 +3127,7 @@ void ROOT::Experimental::RBitsetField::ReadGlobalImpl(NTupleSize_t globalIndex, auto *asULongArray = static_cast(to); bool elementValue; for (std::size_t i = 0; i < fN; ++i) { - fColumns[0]->Read(globalIndex * fN + i, &elementValue); + fPrincipalColumn->Read(globalIndex * fN + i, &elementValue); Word_t mask = static_cast(1) << (i % kBitsPerWord); Word_t bit = static_cast(elementValue) << (i % kBitsPerWord); asULongArray[i / kBitsPerWord] = (asULongArray[i / kBitsPerWord] & ~mask) | bit; @@ -3226,7 +3222,7 @@ std::size_t ROOT::Experimental::RVariantField::AppendImpl(const void *from) index = fNWritten[tag - 1]++; } RColumnSwitch varSwitch(ClusterSize_t(index), tag); - fColumns[0]->Append(&varSwitch); + fPrincipalColumn->Append(&varSwitch); return nbytes + sizeof(RColumnSwitch); } @@ -3348,8 +3344,8 @@ std::size_t ROOT::Experimental::RMapField::AppendImpl(const void *from) count++; } fNWritten += count; - fColumns[0]->Append(&fNWritten); - return nbytes + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(&fNWritten); + return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RMapField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to) @@ -3411,16 +3407,17 @@ void ROOT::Experimental::RNullableField::GenerateColumns() { const auto r = GetColumnRepresentatives(); const auto N = r.size(); - fColumns.reserve(N); + fAvailableColumns.reserve(N); for (std::uint16_t i = 0; i < N; ++i) { if (r[i][0] == EColumnType::kBit) { if (!fDefaultItemValue) fDefaultItemValue = std::make_unique(fSubFields[0]->CreateValue()); - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, i)); + fAvailableColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, i)); } else { - fColumns.emplace_back(Internal::RColumn::Create(r[i][0], 0, i)); + fAvailableColumns.emplace_back(Internal::RColumn::Create(r[i][0], 0, i)); } } + fPrincipalColumn = fAvailableColumns[0].get(); } void ROOT::Experimental::RNullableField::GenerateColumns(const RNTupleDescriptor &desc) @@ -3432,14 +3429,16 @@ void ROOT::Experimental::RNullableField::GenerateColumns(const RNTupleDescriptor break; if (onDiskTypes[0] == EColumnType::kBit) { - fColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, representationIndex)); + fAvailableColumns.emplace_back(Internal::RColumn::Create(EColumnType::kBit, 0, representationIndex)); } else { - fColumns.emplace_back(Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); + fAvailableColumns.emplace_back( + Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); } fColumnRepresentatives.emplace_back(onDiskTypes); representationIndex++; } while (true); + fPrincipalColumn = fAvailableColumns[0].get(); } std::size_t ROOT::Experimental::RNullableField::AppendNull() @@ -3473,7 +3472,7 @@ ROOT::Experimental::RClusterIndex ROOT::Experimental::RNullableField::GetItemInd // We ensure first that the principle column points to the non-suppressed column and // that it has the correct page mapped if (!fPrincipalColumn->ReadPageContains(globalIndex) && !fPrincipalColumn->TryMapPage(globalIndex)) { - for (const auto &c : fColumns) { + for (const auto &c : fAvailableColumns) { if (!c->TryMapPage(globalIndex)) continue; @@ -3497,8 +3496,8 @@ ROOT::Experimental::RClusterIndex ROOT::Experimental::RNullableField::GetItemInd void ROOT::Experimental::RNullableField::ReadInClusterImpl(RClusterIndex clusterIndex, void *to) { - if ((fColumns.size() > 1) && !fPrincipalColumn->TryMapPage(clusterIndex)) { - for (const auto &c : fColumns) { + if ((fAvailableColumns.size() > 1) && !fPrincipalColumn->TryMapPage(clusterIndex)) { + for (const auto &c : fAvailableColumns) { if (!c->TryMapPage(clusterIndex)) continue; @@ -3878,8 +3877,8 @@ std::size_t ROOT::Experimental::RCollectionField::AppendImpl(const void *from) std::size_t bytesWritten = fCollectionWriter->fBytesWritten; fCollectionWriter->fBytesWritten = 0; - fColumns[0]->Append(from); - return bytesWritten + fColumns[0]->GetElement()->GetPackedSize(); + fPrincipalColumn->Append(from); + return bytesWritten + fPrincipalColumn->GetElement()->GetPackedSize(); } void ROOT::Experimental::RCollectionField::ReadGlobalImpl(NTupleSize_t, void *) diff --git a/tree/ntuple/v7/test/ntuple_multi_column.cxx b/tree/ntuple/v7/test/ntuple_multi_column.cxx index 9b6f520ff09ee..2ac049aedddf1 100644 --- a/tree/ntuple/v7/test/ntuple_multi_column.cxx +++ b/tree/ntuple/v7/test/ntuple_multi_column.cxx @@ -17,13 +17,13 @@ TEST(RNTuple, MultiColumnRepresentationSimple) *ptrPx = 1.0; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("px")), 1); *ptrPx = 2.0; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( - const_cast(writer->GetModel().GetField("px")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("px")), 0); *ptrPx = 3.0; writer->Fill(); } @@ -156,16 +156,16 @@ TEST(RNTuple, MultiColumnRepresentationString) *ptrStr = "abc"; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("str")), 1); ptrStr->clear(); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( - const_cast(writer->GetModel().GetField("str")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("str")), 0); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("str")), 1); *ptrStr = "x"; writer->Fill(); @@ -198,17 +198,17 @@ TEST(RNTuple, MultiColumnRepresentationVector) auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("vec")), 1); ptrVec->push_back(1.0); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( - const_cast(writer->GetModel().GetField("vec")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 0); ptrVec->clear(); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("vec")), 1); ptrVec->push_back(2.0); ptrVec->push_back(3.0); @@ -250,19 +250,19 @@ TEST(RNTuple, MultiColumnRepresentationNullable) ptrVector->push_back(13.0); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("scalar")), 1); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("vector._0")), 1); ptrScalar->reset(); ptrVector->clear(); ptrVector->push_back(std::optional()); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( - const_cast(writer->GetModel().GetField("scalar")), 1); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( - const_cast(writer->GetModel().GetField("vector._0")), 1); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("scalar")), 0); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("vector._0")), 0); *ptrScalar = 3.0; ptrVector->clear(); ptrVector->push_back(15.0); @@ -310,7 +310,7 @@ TEST(RNTuple, MultiColumnRepresentationBulk) *ptrPx = 1.0; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("px")), 1); *ptrPx = 2.0; writer->Fill(); @@ -350,7 +350,7 @@ TEST(RNTuple, MultiColumnRepresentationFriends) *ptrPt = 1.0; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("pt")), 1); *ptrPt = 2.0; writer->Fill(); @@ -360,7 +360,7 @@ TEST(RNTuple, MultiColumnRepresentationFriends) *ptrEta = 3.0; writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("eta")), 1); *ptrEta = 4.0; writer->Fill(); diff --git a/tree/ntupleutil/v7/test/ntuple_inspector.cxx b/tree/ntupleutil/v7/test/ntuple_inspector.cxx index cbf65097d6b52..ecc84f3456bf0 100644 --- a/tree/ntupleutil/v7/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/v7/test/ntuple_inspector.cxx @@ -772,7 +772,7 @@ TEST(RNTupleInspector, MultiColumnRepresentations) auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); writer->Fill(); writer->CommitCluster(); - ROOT::Experimental::Internal::RFieldRepresentationModifier::SwapPrimayColumnRepresentation( + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( const_cast(writer->GetModel().GetField("px")), 1); writer->Fill(); } From 0372a89bc38d4d0567be1b8e3af793e6eb005b86 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 1 Aug 2024 00:32:53 +0200 Subject: [PATCH 32/37] [ntuple] fix switching of columns of a team --- tree/ntuple/v7/src/RColumn.cxx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index b59219ffd19bc..b547c8b9b4f61 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -121,7 +121,8 @@ bool ROOT::Experimental::Internal::RColumn::TryMapPage(NTupleSize_t globalIndex) fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), globalIndex); if (fReadPage.IsValid()) break; - fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; + fLastGoodTeamIdx = (fLastGoodTeamIdx + 1) % nTeam; + iTeam++; } while (iTeam <= nTeam); return fReadPage.Contains(globalIndex); @@ -140,7 +141,8 @@ bool ROOT::Experimental::Internal::RColumn::TryMapPage(RClusterIndex clusterInde fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), clusterIndex); if (fReadPage.IsValid()) break; - fLastGoodTeamIdx = (fLastGoodTeamIdx + iTeam++) % nTeam; + fLastGoodTeamIdx = (fLastGoodTeamIdx + 1) % nTeam; + iTeam++; } while (iTeam <= nTeam); return fReadPage.Contains(clusterIndex); From d86f7b1c6d7622be839cf042e1aaaaf3a88ebf7b Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 1 Aug 2024 00:07:56 +0200 Subject: [PATCH 33/37] [ntuple] add test for >2 column representations --- tree/ntuple/v7/test/ntuple_multi_column.cxx | 51 +++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tree/ntuple/v7/test/ntuple_multi_column.cxx b/tree/ntuple/v7/test/ntuple_multi_column.cxx index 2ac049aedddf1..dbc040df967ec 100644 --- a/tree/ntuple/v7/test/ntuple_multi_column.cxx +++ b/tree/ntuple/v7/test/ntuple_multi_column.cxx @@ -231,6 +231,57 @@ TEST(RNTuple, MultiColumnRepresentationVector) EXPECT_FLOAT_EQ(3.0, ptrVec->at(1)); } +TEST(RNTuple, MultiColumnRepresentationMany) +{ + using ROOT::Experimental::kUnknownCompressionSettings; + FileRaii fileGuard("test_ntuple_multi_column_representation_many.root"); + + { + auto model = RNTupleModel::Create(); + auto fldVec = RFieldBase::Create("vec", "std::vector").Unwrap(); + fldVec->SetColumnRepresentatives({{EColumnType::kIndex32}, + {EColumnType::kSplitIndex64}, + {EColumnType::kIndex64}, + {EColumnType::kSplitIndex32}}); + model->AddField(std::move(fldVec)); + auto ptrVec = model->GetDefaultEntry().GetPtr>("vec"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + ptrVec->push_back(1.0); + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 1); + (*ptrVec)[0] = 2.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 2); + (*ptrVec)[0] = 3.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Experimental::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetField("vec")), 3); + (*ptrVec)[0] = 4.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + EXPECT_EQ(4u, reader->GetNEntries()); + auto ptrVec = reader->GetModel().GetDefaultEntry().GetPtr>("vec"); + reader->LoadEntry(0); + EXPECT_EQ(1u, ptrVec->size()); + EXPECT_FLOAT_EQ(1.0, ptrVec->at(0)); + reader->LoadEntry(1); + EXPECT_EQ(1u, ptrVec->size()); + EXPECT_FLOAT_EQ(2.0, ptrVec->at(0)); + reader->LoadEntry(2); + EXPECT_EQ(1u, ptrVec->size()); + EXPECT_FLOAT_EQ(3.0, ptrVec->at(0)); + reader->LoadEntry(3); + EXPECT_EQ(1u, ptrVec->size()); + EXPECT_FLOAT_EQ(4.0, ptrVec->at(0)); +} + TEST(RNTuple, MultiColumnRepresentationNullable) { FileRaii fileGuard("test_ntuple_multi_column_representation_nullable.root"); From 0d7675fffa827106ceac3bb919be9489e2aa1d4e Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 1 Aug 2024 00:53:07 +0200 Subject: [PATCH 34/37] [NFC][ntuple] add clarifying comment --- tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx b/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx index 50a48b6b8686f..780a783d9da47 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageSourceFriends.hxx @@ -76,6 +76,10 @@ private: Detail::RNTupleMetrics fMetrics; std::vector> fSources; RIdBiMap fIdBiMap; + /// TODO(jblomer): Not only the columns, but actually all the different objects of the descriptor would need + /// their own maps to avoid descriptor ID clashes. The need for the distinct column map was triggered by adding + /// support for multi-column representations. A follow-up patch should either fix the friend page source properly + /// or remove it in favor of the RNTupleProcessor. RIdBiMap fColumnMap; RNTupleDescriptorBuilder fBuilder; From e1ce683c2ec3bf745f2702caff5d0aab24e7e397 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 1 Aug 2024 01:02:28 +0200 Subject: [PATCH 35/37] [ntuple] minor improvement --- tree/ntuple/v7/src/RField.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index c973d885b220d..fd6257234ffc5 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -1027,6 +1027,7 @@ void ROOT::Experimental::RFieldBase::SetColumnRepresentatives( throw RException(R__FAIL("cannot set column representative once field is connected")); const auto &validTypes = GetColumnRepresentations().GetSerializationTypes(); fColumnRepresentatives.clear(); + fColumnRepresentatives.reserve(representatives.size()); for (const auto &r : representatives) { auto itRepresentative = std::find(validTypes.begin(), validTypes.end(), r); if (itRepresentative == std::end(validTypes)) From f11ca91f2bcf84d74144f4c4ae2f1c0dcd7b46a8 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 1 Aug 2024 01:17:47 +0200 Subject: [PATCH 36/37] [ntuple] avoid shared pointer for RColumn::fTeam --- tree/ntuple/v7/inc/ROOT/RColumn.hxx | 8 +++----- tree/ntuple/v7/src/RColumn.cxx | 27 +++++++++++++++++---------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RColumn.hxx b/tree/ntuple/v7/inc/ROOT/RColumn.hxx index dc5bb471b0ca7..ee9e875f8a1c7 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumn.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumn.hxx @@ -75,11 +75,11 @@ private: NTupleSize_t fFirstElementIndex = 0; /// Used to pack and unpack pages on writing/reading std::unique_ptr fElement; - /// The column team is a set of columns that serve the same column index for different representation IDs - /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two column + /// The column team is a set of columns that serve the same column index for different representation IDs. + /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed /// column for a MapPage() call, it get the page from the active column in the corresponding cluster. - std::shared_ptr> fTeam; + std::vector fTeam; /// Points into fTeam to the column that successfully returned the last page. std::size_t fLastGoodTeamIdx = 0; @@ -117,8 +117,6 @@ private: fWritePage[otherIdx].Reset(0); } - void TeamUp(const RColumn &other) { fTeam = other.fTeam; } - public: template static std::unique_ptr Create(EColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx) diff --git a/tree/ntuple/v7/src/RColumn.cxx b/tree/ntuple/v7/src/RColumn.cxx index b547c8b9b4f61..ffbd736d11bd6 100644 --- a/tree/ntuple/v7/src/RColumn.cxx +++ b/tree/ntuple/v7/src/RColumn.cxx @@ -19,15 +19,13 @@ #include +#include #include #include ROOT::Experimental::Internal::RColumn::RColumn(EColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex) - : fType(type), - fIndex(columnIndex), - fRepresentationIndex(representationIndex), - fTeam(std::make_shared>(std::vector{this})) + : fType(type), fIndex(columnIndex), fRepresentationIndex(representationIndex), fTeam({this}) { // TODO(jblomer): fix for column types with configurable bit length once available const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(type); @@ -115,10 +113,10 @@ bool ROOT::Experimental::Internal::RColumn::TryMapPage(NTupleSize_t globalIndex) // the page population fails. fReadPage = RPage(); - const auto nTeam = fTeam->size(); + const auto nTeam = fTeam.size(); std::size_t iTeam = 1; do { - fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), globalIndex); + fReadPage = fPageSource->PopulatePage(fTeam.at(fLastGoodTeamIdx)->GetHandleSource(), globalIndex); if (fReadPage.IsValid()) break; fLastGoodTeamIdx = (fLastGoodTeamIdx + 1) % nTeam; @@ -135,10 +133,10 @@ bool ROOT::Experimental::Internal::RColumn::TryMapPage(RClusterIndex clusterInde // the page population fails. fReadPage = RPage(); - const auto nTeam = fTeam->size(); + const auto nTeam = fTeam.size(); std::size_t iTeam = 1; do { - fReadPage = fPageSource->PopulatePage(fTeam->at(fLastGoodTeamIdx)->GetHandleSource(), clusterIndex); + fReadPage = fPageSource->PopulatePage(fTeam.at(fLastGoodTeamIdx)->GetHandleSource(), clusterIndex); if (fReadPage.IsValid()) break; fLastGoodTeamIdx = (fLastGoodTeamIdx + 1) % nTeam; @@ -150,6 +148,15 @@ bool ROOT::Experimental::Internal::RColumn::TryMapPage(RClusterIndex clusterInde void ROOT::Experimental::Internal::RColumn::MergeTeams(RColumn &other) { - fTeam->emplace_back(&other); - other.TeamUp(*this); + // We are working on very small vectors here, so quadratic complexity works + for (auto *c : other.fTeam) { + if (std::find(fTeam.begin(), fTeam.end(), c) == fTeam.end()) + fTeam.emplace_back(c); + } + + for (auto c : fTeam) { + if (c == this) + continue; + c->fTeam = fTeam; + } } From 9f6c0b34311bbba58a44b7e502f5c5d37bc21565 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Thu, 1 Aug 2024 10:39:22 +0200 Subject: [PATCH 37/37] [cling] Forcefully clean up TemplateIdAnnotations Upstream Clang keeps TemplateIdAnnotations around "if they might still be in the token stream." See upstream commit for more details: https://github.com/llvm/llvm-project/commit/6163aa96799cbad7f2f58e02c5bebee9647056a5 (included in Clang 11, in ROOT since the upgrade to LLVM 13) This reasoning doesn't apply when we fully reset the Parser state in ParserStateRAII's destructor, and we expect the swapped out vector of TemplateIdAnnotations to be empty in order to not leak. Fixes #16121 --- interpreter/cling/lib/Utils/ParserStateRAII.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/interpreter/cling/lib/Utils/ParserStateRAII.cpp b/interpreter/cling/lib/Utils/ParserStateRAII.cpp index d53c3b8c00311..d0eae481986d8 100644 --- a/interpreter/cling/lib/Utils/ParserStateRAII.cpp +++ b/interpreter/cling/lib/Utils/ParserStateRAII.cpp @@ -50,10 +50,13 @@ cling::ParserStateRAII::~ParserStateRAII() { // Note: Consuming the EOF token will pop the include stack. // { - // Cleanup the TemplateIds before swapping the previous set back. - Parser::DestroyTemplateIdAnnotationsRAIIObj CleanupTemplateIds(*P); + // Forcefully clean up the TemplateIds, ignoring additional checks in + // MaybeDestroyTemplateIds called from DestroyTemplateIdAnnotationsRAIIObj, + // before swapping the previous set back. + P->DestroyTemplateIds(); } P->TemplateIds.swap(OldTemplateIds); + assert(OldTemplateIds.empty()); if (SkipToEOF) P->SkipUntil(tok::eof); else