Merge pull request #1673 from kosack/refactor/dl1writer_to_datawriter

DL2 data model and output
cta-observatory · May 19, 2021 · 5a874ec · 5a874ec
2 parents 61c0018 + 08f56ed
commit 5a874ec
Show file tree

Hide file tree

Showing 22 changed files with 436 additions and 263 deletions.
diff --git a/ctapipe/containers.py b/ctapipe/containers.py
@@ -18,6 +18,7 @@
     "DL1CameraCalibrationContainer",
     "DL1CameraContainer",
     "DL1Container",
+    "DL2Container",
     "EventCalibrationContainer",
     "EventCameraCalibrationContainer",
     "EventIndexContainer",
@@ -38,7 +39,7 @@
     "R1Container",
     "ReconstructedContainer",
     "ReconstructedEnergyContainer",
-    "ReconstructedShowerContainer",
+    "ReconstructedGeometryContainer",
     "SimulatedCameraContainer",
     "SimulatedShowerContainer",
     "SimulatedShowerDistribution",
@@ -490,17 +491,19 @@ class TriggerContainer(Container):
     container_prefix = ""
     time = Field(NAN_TIME, "central average time stamp")
     tels_with_trigger = Field(
-        [], "List of telescope ids that triggered the array event"
+        None, "List of telescope ids that triggered the array event"
     )
     event_type = Field(EventType.SUBARRAY, "Event type")
     tel = Field(Map(TelescopeTriggerContainer), "telescope-wise trigger information")
 
 
-class ReconstructedShowerContainer(Container):
+class ReconstructedGeometryContainer(Container):
     """
     Standard output of algorithms reconstructing shower geometry
     """
 
+    container_prefix = ""
+
     alt = Field(nan * u.deg, "reconstructed altitude", unit=u.deg)
     alt_uncert = Field(nan * u.deg, "reconstructed altitude uncertainty", unit=u.deg)
     az = Field(nan * u.deg, "reconstructed azimuth", unit=u.deg)
@@ -523,20 +526,20 @@ class ReconstructedShowerContainer(Container):
             "was properly reconstructed by the algorithm"
         ),
     )
-    tel_ids = Field(
-        [], ("list of the telescope ids used in the" " reconstruction of the shower")
-    )
     average_intensity = Field(
         nan, "average intensity of the intensities used for reconstruction"
     )
     goodness_of_fit = Field(nan, "measure of algorithm success (if fit)")
+    tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono")
 
 
 class ReconstructedEnergyContainer(Container):
     """
     Standard output of algorithms estimating energy
     """
 
+    container_prefix = ""
+
     energy = Field(nan * u.TeV, "reconstructed energy", unit=u.TeV)
     energy_uncert = Field(nan * u.TeV, "reconstructed energy uncertainty", unit=u.TeV)
     is_valid = Field(
@@ -547,67 +550,70 @@ class ReconstructedEnergyContainer(Container):
             "algorithm"
         ),
     )
-    tel_ids = Field(
-        [],
-        (
-            "array containing the telescope ids used in the"
-            " reconstruction of the shower"
-        ),
-    )
-    goodness_of_fit = Field(0.0, "goodness of the algorithm fit")
+    goodness_of_fit = Field(nan, "goodness of the algorithm fit")
+    tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono")
 
 
 class ParticleClassificationContainer(Container):
     """
     Standard output of gamma/hadron classification algorithms
     """
 
+    container_prefix = ""
+
     # TODO: Do people agree on this? This is very MAGIC-like.
     # TODO: Perhaps an integer classification to support different classes?
     # TODO: include an error on the prediction?
     prediction = Field(
-        0.0,
+        nan,
         (
             "prediction of the classifier, defined between "
             "[0,1], where values close to 0 are more "
             "gamma-like, and values close to 1 more "
             "hadron-like"
         ),
     )
-    is_valid = Field(
-        False,
-        (
-            "classificator validity flag. True if the "
-            "predition was successful within the algorithm "
-            "validity range"
-        ),
-    )
-
-    # TODO: KPK: is this different than the list in the reco
-    # container? Why repeat?
-    tel_ids = Field(
-        [],
-        (
-            "array containing the telescope ids used "
-            "in the reconstruction of the shower"
-        ),
-    )
-    goodness_of_fit = Field(0.0, "goodness of the algorithm fit")
+    is_valid = Field(False, "true if classification parameters are valid")
+    goodness_of_fit = Field(nan, "goodness of the algorithm fit")
+    tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono")
 
 
 class ReconstructedContainer(Container):
-    """ collect reconstructed shower info from multiple algorithms """
+    """ Reconstructed shower info from multiple algorithms """
+
+    # Note: there is a reason why the hiererchy is
+    # `event.dl2.stereo.geometry[algorithm]` and not
+    # `event.dl2[algorithm].stereo.geometry` and that is because when writing
+    # the data, the former makes it easier to only write information that a
+    # particular reconstructor generates, e.g. only write the geometry in cases
+    # where energy is not yet computed. Some algorithms will compute all three,
+    # but most will compute only fill or two of these sub-Contaiers:
 
-    shower = Field(
-        Map(ReconstructedShowerContainer), "Map of algorithm name to shower info"
+    geometry = Field(
+        Map(ReconstructedGeometryContainer),
+        "map of algorithm to reconstructed shower parameters",
     )
     energy = Field(
-        Map(ReconstructedEnergyContainer), "Map of algorithm name to energy info"
+        Map(ReconstructedEnergyContainer),
+        "map of algorithm to reconstructed energy parameters",
     )
     classification = Field(
         Map(ParticleClassificationContainer),
-        "Map of algorithm name to classification info",
+        "map of algorithm to classification parameters",
+    )
+
+
+class DL2Container(Container):
+    """Reconstructed Shower information for a given reconstruction algorithm,
+    including optionally both per-telescope mono reconstruction and per-shower
+    stereo reconstructions
+    """
+
+    tel = Field(
+        Map(ReconstructedContainer),
+        "map of tel_id to single-telescope reconstruction (DL2a)",
     )
+    stereo = Field(ReconstructedContainer(), "Stereo Shower reconstruction results")
 
 
 class TelescopePointingContainer(Container):
@@ -885,7 +891,7 @@ class ArrayEventContainer(Container):
     r1 = Field(R1Container(), "R1 Calibrated Data")
     dl0 = Field(DL0Container(), "DL0 Data Volume Reduced Data")
     dl1 = Field(DL1Container(), "DL1 Calibrated image")
-    dl2 = Field(ReconstructedContainer(), "Reconstructed Shower Information")
+    dl2 = Field(DL2Container(), "DL2 reconstruction info")
     simulation = Field(
         None, "Simulated Event Information", type=SimulatedEventContainer
     )

diff --git a/ctapipe/io/__init__.py b/ctapipe/io/__init__.py
@@ -4,7 +4,7 @@
 from .tableio import TableWriter, TableReader
 from .datalevels import DataLevel
 from .astropy_helpers import read_table
-from .dl1writer import DL1Writer
+from .datawriter import DataWriter
 
 from ..core.plugins import detect_and_import_io_plugins
 
@@ -27,5 +27,5 @@
     "DL1EventSource",
     "DataLevel",
     "read_table",
-    "DL1Writer",
+    "DataWriter",
 ]
diff --git a/ctapipe/io/datalevels.py b/ctapipe/io/datalevels.py
@@ -4,11 +4,17 @@
 class DataLevel(Enum):
     """Enum of the different Data Levels"""
 
-    R0 = auto()
-    R1 = auto()
-    R2 = auto()
-    DL0 = auto()
-    DL1_IMAGES = auto()
-    DL1_PARAMETERS = auto()
-    DL2 = auto()
-    DL3 = auto()
+    R0 = auto()  # Raw data in camera or simulation format
+    R1 = auto()  # Raw data in common format, with preliminary calibration
+    DL0 = auto()  # raw archived data in common format, with optional zero suppression
+    DL1 = auto()  # processed data
+    DL1_IMAGES = auto()  # processed data up to camera images
+    DL1_PARAMETERS = auto()  #  parameters derived from camera images
+    DL2 = auto()  # reconstructed data
+    DL3 = auto()  # reduced reconstructed data
+
+    # the rest are not generated by ctapipe, but are listed here in case this
+    # code is used elsewhere:
+    DL4 = auto()  # binned  datasets
+    DL5 = auto()  # science datasets (fluxes)
+    DL6 = auto()  # derived science data (catalogs, etc.)