WIP: trying to load rntuple into a jagged array data structure

alpaka-group · Jul 8, 2021 · ad0dc10 · ad0dc10
1 parent 192c835
commit ad0dc10
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 11 deletions.
diff --git a/examples/common/ttjet_13tev_june2019.hpp b/examples/common/ttjet_13tev_june2019.hpp
@@ -7,7 +7,7 @@
 
 using bit = bool;
 using byte = unsigned char;
-using Index = std::uint64_t;
+using Index = std::uint32_t;
 
 // clang-format off
 struct run {};
@@ -1538,7 +1538,7 @@ using Electron = llama::Record<
     llama::Field<Electron_pdgId, std::int32_t>,
     llama::Field<Electron_photonIdx, std::int32_t>,
     llama::Field<Electron_tightCharge, std::int32_t>,
-    llama::Field<Electron_vidNestedWPbitmap, std::int32_t>,
+    //llama::Field<Electron_vidNestedWPbitmap, std::int32_t>,
     llama::Field<Electron_convVeto, bit>,
     llama::Field<Electron_cutBased_HEEP, bit>,
     llama::Field<Electron_isPFcand, bit>,
@@ -1947,6 +1947,7 @@ using Event = llama::Record<
     llama::Field<ChsMET_sumEt, float>,
     //llama::Field<nCorrT1METJet, Index>,
     //llama::Field<nElectron, Index>,
+    llama::Field<nElectron, Electron[]>,
     llama::Field<Flag_ecalBadCalibFilterV2, bit>,
     //llama::Field<nFatJet, Index>,
     //llama::Field<nGenJetAK8, Index>,

diff --git a/examples/hep_rntuple/hep_rntuple.cpp b/examples/hep_rntuple/hep_rntuple.cpp
@@ -1,5 +1,5 @@
 // This example uses a non-public CMS NanoAOD file called: ttjet_13tev_june2019_lzma.
-// Please ask contact us if you need it.
+// Please contact us if you need it.
 
 #include "../common/ttjet_13tev_june2019.hpp"
 
@@ -14,6 +14,8 @@
 #include <llama/DumpMapping.hpp>
 #include <llama/llama.hpp>
 
+using SmallEvent = boost::mp11::mp_take_c<Event, 100>;
+
 int main(int argc, const char* argv[])
 {
     if (argc != 2)
@@ -25,27 +27,89 @@ int main(int argc, const char* argv[])
     using namespace std::chrono;
     using namespace ROOT::Experimental;
 
+    // auto ntuple
+    //    = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", "/mnt/c/dev/llama/ttjet_13tev_june2019_lzma.root");
     auto ntuple = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", argv[1]);
-    const auto n = ntuple->GetNEntries();
+    // try
+    //{
+    //    ntuple->PrintInfo(ROOT::Experimental::ENTupleInfo::kStorageDetails);
+    //}
+    // catch (const std::exception& e)
+    //{
+    //    fmt::print("PrintInfo error: {}", e.what());
+    //}
+    const auto eventCount = ntuple->GetNEntries();
+    const auto& d = ntuple->GetDescriptor();
+    const auto electronCount
+        = d.GetNElements(d.FindColumnId(d.FindFieldId("nElectron.nElectron.Electron_deltaEtaSC"), 0));
+    fmt::print("File contains {} events with {} electrons\n", eventCount, electronCount);
 
     auto start = steady_clock::now();
-    auto view = llama::allocView(llama::mapping::SoA<llama::ArrayDims<1>, Event, true>{llama::ArrayDims{n}});
+    auto mapping = llama::mapping::OffsetTable<llama::ArrayDims<1>, SmallEvent>{
+        llama::ArrayDims{eventCount},
+        llama::ArrayDims{electronCount}};
+    auto view = llama::allocView(mapping);
     fmt::print("Alloc LLAMA view: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());
 
     std::size_t totalSize = 0;
     for (auto i = 0u; i < view.mapping.blobCount; i++)
         totalSize += view.mapping.blobSize(i);
     fmt::print("Total LLAMA view memory: {}MiB in {} blobs\n", totalSize / 1024 / 1024, view.mapping.blobCount);
 
+    // fill offset table
     start = steady_clock::now();
-    llama::forEachLeaf<Event>(
+    std::size_t offset = 0;
+    auto electronViewCollection = ntuple->GetViewCollection("nElectron");
+    for (std::size_t i = 0; i < eventCount; i++)
+    {
+        offset += electronViewCollection(i);
+        view(i)(llama::EndOffset<nElectron>{}) = offset;
+        assert(offset <= electronCount);
+    }
+    fmt::print("Fill offset table: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());
+
+    using AugmentedSmallEvent = typename decltype(mapping)::RecordDim;
+    start = steady_clock::now();
+    llama::forEachLeaf<AugmentedSmallEvent>(
         [&](auto coord)
         {
-            using Name = llama::GetTag<Event, decltype(coord)>;
-            using Type = llama::GetType<Event, decltype(coord)>;
-            auto column = ntuple->GetView<Type>(llama::structName<Name>());
-            for (std::size_t i = 0; i < n; i++)
-                view(i)(coord) = column(i);
+            using Coord = decltype(coord);
+            using LeafTag = llama::GetTag<AugmentedSmallEvent, Coord>;
+            using Type = llama::GetType<AugmentedSmallEvent, Coord>;
+
+            fmt::print("Copying {}\n", llama::structName<LeafTag>());
+            if constexpr (
+                !llama::mapping::internal::isEndOffsetField<LeafTag> && !llama::mapping::internal::isSizeField<LeafTag>)
+            {
+                if constexpr (boost::mp11::mp_contains<typename Coord::List, boost::mp11::mp_size_t<llama::dynamic>>::
+                                  value)
+                {
+                    using Before = llama::mapping::internal::BeforeDynamic<Coord>;
+                    using BeforeBefore = llama::RecordCoordFromList<boost::mp11::mp_pop_front<typename Before::List>>;
+                    using After = llama::mapping::internal::AfterDynamic<Coord>;
+                    using SubCollectionTag = llama::GetTag<AugmentedSmallEvent, Before>;
+
+                    auto collectionColumn = ntuple->GetViewCollection(llama::structName<SubCollectionTag>());
+                    auto column = collectionColumn.template GetView<Type>(
+                        llama::structName<SubCollectionTag>() + "." + llama::structName<LeafTag>());
+                    for (std::size_t i = 0; i < eventCount; i++)
+                    {
+                        const auto subCollectionCount = view(i)(BeforeBefore{})(llama::Size<SubCollectionTag>{});
+                        for (std::size_t j = 0; j < subCollectionCount; j++)
+                        {
+                            const auto value = column(j);
+                            auto& dst = view(i)(Before{})(j) (After{});
+                            dst = value;
+                        }
+                    }
+                }
+                else
+                {
+                    auto column = ntuple->GetView<Type>(llama::structName<LeafTag>());
+                    for (std::size_t i = 0; i < eventCount; i++)
+                        view(i)(coord) = column(i);
+                }
+            }
         });
     fmt::print("Copy RNTuple -> LLAMA view: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());