From 164d36ea9e204d3de84af583846c469f59193213 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Tue, 6 Apr 2021 21:23:38 +0200 Subject: [PATCH 01/33] clean up DL2 containers, adding mono reco --- ctapipe/containers.py | 83 +++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/ctapipe/containers.py b/ctapipe/containers.py index 2e373c1423d..f2a1047552e 100644 --- a/ctapipe/containers.py +++ b/ctapipe/containers.py @@ -487,17 +487,19 @@ class TriggerContainer(Container): container_prefix = "" time = Field(NAN_TIME, "central average time stamp") tels_with_trigger = Field( - [], "List of telescope ids that triggered the array event" + None, "List of telescope ids that triggered the array event" ) event_type = Field(EventType.SUBARRAY, "Event type") tel = Field(Map(TelescopeTriggerContainer), "telescope-wise trigger information") -class ReconstructedShowerContainer(Container): +class ReconstructedGeometryContainer(Container): """ Standard output of algorithms reconstructing shower geometry """ + container_prefix = "" + alt = Field(nan * u.deg, "reconstructed altitude", unit=u.deg) alt_uncert = Field(nan * u.deg, "reconstructed altitude uncertainty", unit=u.deg) az = Field(nan * u.deg, "reconstructed azimuth", unit=u.deg) @@ -520,13 +522,11 @@ class ReconstructedShowerContainer(Container): "was properly reconstructed by the algorithm" ), ) - tel_ids = Field( - [], ("list of the telescope ids used in the" " reconstruction of the shower") - ) average_intensity = Field( nan, "average intensity of the intensities used for reconstruction" ) goodness_of_fit = Field(nan, "measure of algorithm success (if fit)") + tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") class ReconstructedEnergyContainer(Container): @@ -534,6 +534,8 @@ class ReconstructedEnergyContainer(Container): Standard output of algorithms estimating energy """ + container_prefix = "" + energy = Field(nan * u.TeV, "reconstructed energy", unit=u.TeV) energy_uncert = Field(nan * u.TeV, "reconstructed energy uncertainty", unit=u.TeV) is_valid = Field( @@ -544,14 +546,8 @@ class ReconstructedEnergyContainer(Container): "algorithm" ), ) - tel_ids = Field( - [], - ( - "array containing the telescope ids used in the" - " reconstruction of the shower" - ), - ) - goodness_of_fit = Field(0.0, "goodness of the algorithm fit") + goodness_of_fit = Field(nan, "goodness of the algorithm fit") + tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") class ParticleClassificationContainer(Container): @@ -559,11 +555,13 @@ class ParticleClassificationContainer(Container): Standard output of gamma/hadron classification algorithms """ + container_prefix = "" + # TODO: Do people agree on this? This is very MAGIC-like. # TODO: Perhaps an integer classification to support different classes? # TODO: include an error on the prediction? prediction = Field( - 0.0, + nan, ( "prediction of the classifier, defined between " "[0,1], where values close to 0 are more " @@ -571,40 +569,47 @@ class ParticleClassificationContainer(Container): "hadron-like" ), ) - is_valid = Field( - False, - ( - "classificator validity flag. True if the " - "predition was successful within the algorithm " - "validity range" - ), - ) - - # TODO: KPK: is this different than the list in the reco - # container? Why repeat? - tel_ids = Field( - [], - ( - "array containing the telescope ids used " - "in the reconstruction of the shower" - ), - ) - goodness_of_fit = Field(0.0, "goodness of the algorithm fit") + is_valid = Field(False, "true if classification parameters are valid") + goodness_of_fit = Field(nan, "goodness of the algorithm fit") + tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") class ReconstructedContainer(Container): - """ collect reconstructed shower info from multiple algorithms """ + """ Reconstructed shower info from multiple algorithms """ + + # Note: there is a reason why the hiererchy is + # `event.dl2.stereo.geometry[algorithm]` and not + # `event.dl2[algorithm].stereo.geometry` and that is because when writing + # the data, the former makes it easier to only write information that a + # particular reconstructor generates, e.g. only write the geometry in cases + # where energy is not yet computed. Some algorithms will compute all three, + # but most will compute only fill or two of these sub-Contaiers: - shower = Field( - Map(ReconstructedShowerContainer), "Map of algorithm name to shower info" + geometry = Field( + Map(ReconstructedGeometryContainer), + "map of algorithm to reconstructed shower parameters", ) energy = Field( - Map(ReconstructedEnergyContainer), "Map of algorithm name to energy info" + Map(ReconstructedEnergyContainer), + "map of algorithm to reconstructed energy parameters", ) classification = Field( Map(ParticleClassificationContainer), - "Map of algorithm name to classification info", + "map of algorithm to classification parameters", + ) + + +class DL2Container(Container): + """ + Reconstructed Shower information for a given reconstruction algorithm, + including optionally both per-telescope mono reconstruction and per-shower stereo reconstructions + """ + + tel = Field( + Map(ReconstructedContainer), + "map of tel_id to single-telescope reconstruction (DL2a)", ) + stereo = Field(ReconstructedContainer(), "Stereo Shower reconstruction results") class TelescopePointingContainer(Container): @@ -882,7 +887,7 @@ class ArrayEventContainer(Container): r1 = Field(R1Container(), "R1 Calibrated Data") dl0 = Field(DL0Container(), "DL0 Data Volume Reduced Data") dl1 = Field(DL1Container(), "DL1 Calibrated image") - dl2 = Field(ReconstructedContainer(), "Reconstructed Shower Information") + dl2 = Field(DL2Container(), "DL2 reconstruction info") simulation = Field( None, "Simulated Event Information", type=SimulatedEventContainer ) From 3bcf0a6d6207750cd2bddaae496c0ed61df5dae9 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Tue, 6 Apr 2021 21:24:31 +0200 Subject: [PATCH 02/33] refactor DL1Writer to DataWriter --- ctapipe/io/__init__.py | 2 +- ctapipe/io/{dl1writer.py => datawriter.py} | 103 ++++++++++++++---- ctapipe/io/dl1eventsource.py | 4 +- ctapipe/io/metadata.py | 15 ++- .../{test_dl1writer.py => test_datawriter.py} | 53 +++++++-- ctapipe/reco/HillasReconstructor.py | 12 +- ctapipe/reco/ImPACT.py | 4 +- ctapipe/reco/hillas_intersection.py | 8 +- ctapipe/reco/reco_algorithms.py | 4 +- ctapipe/reco/tests/test_ImPACT.py | 4 +- ctapipe/tools/stage1.py | 22 ++-- examples/stage1_config.json | 2 +- 12 files changed, 167 insertions(+), 66 deletions(-) rename ctapipe/io/{dl1writer.py => datawriter.py} (86%) rename ctapipe/io/tests/{test_dl1writer.py => test_datawriter.py} (71%) diff --git a/ctapipe/io/__init__.py b/ctapipe/io/__init__.py index ada2577de95..1e3dc01c9cc 100644 --- a/ctapipe/io/__init__.py +++ b/ctapipe/io/__init__.py @@ -5,7 +5,7 @@ from .tableio import TableWriter, TableReader from .datalevels import DataLevel from .astropy_helpers import read_table -from .dl1writer import DL1Writer +from .datawriter import DataWriter from ..core.plugins import detect_and_import_io_plugins diff --git a/ctapipe/io/dl1writer.py b/ctapipe/io/datawriter.py similarity index 86% rename from ctapipe/io/dl1writer.py rename to ctapipe/io/datawriter.py index ba9fe86ac0b..911d498a868 100644 --- a/ctapipe/io/dl1writer.py +++ b/ctapipe/io/datawriter.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Class to write DL1 data from an event stream +Class to write DL1 (a,b) and DL2 (a) data from an event stream """ @@ -29,8 +29,9 @@ tables.parameters.NODE_CACHE_SLOTS = 3000 # fixes problem with too many datasets -DL1_DATA_MODEL_VERSION = "v1.1.0" -DL1_DATA_MODEL_CHANGE_HISTORY = """ +DATA_MODEL_VERSION = "v1.2.0" +DATA_MODEL_CHANGE_HISTORY = """ +- v1.2.0: change to more general data model, including also DL2 (DL1 unchanged) - v1.1.0: images and peak_times can be stored as scaled integers - v1.0.3: true_image dtype changed from float32 to int32 """ @@ -60,11 +61,11 @@ def write_reference_metadata_headers(obs_ids, subarray, writer, is_simulation): contact=meta.Contact(name="", email="", organization="CTA Consortium"), product=meta.Product( description="DL1 Data Product", - data_category="S", - data_level="DL1", + data_category="S", # TODO: make this automatically filled + data_level="DL1,DL2", # TODO: make this automatically filled data_association="Subarray", - data_model_name="ASWG DL1", - data_model_version=DL1_DATA_MODEL_VERSION, + data_model_name="ASWG", + data_model_version=DATA_MODEL_VERSION, data_model_url="", format="hdf5", ), @@ -88,7 +89,7 @@ def write_reference_metadata_headers(obs_ids, subarray, writer, is_simulation): meta.write_to_hdf5(headers, writer._h5file) -class DL1Writer(Component): +class DataWriter(Component): """ Serialize a sequence of events into a HDF5 DL1 file, in the correct format @@ -106,15 +107,23 @@ class DL1Writer(Component): """ output_path = Path( - help="DL1 output filename", default_value=pathlib.Path("events.dl1.h5") + help="output filename", default_value=pathlib.Path("events.dl1.h5") ).tag(config=True) - write_images = Bool( - help="Store DL1/Event/Image data in output", default_value=False - ).tag(config=True) + write_images = Bool(help="Store DL1 Images if available", default_value=False).tag( + config=True + ) write_parameters = Bool( - help="Compute and store image parameters", default_value=True + help="Store DL1 image parameters if available", default_value=True + ).tag(config=True) + + write_stereo_shower = Bool( + help="Store DL2 stereo shower geometry if available", default_value=False + ).tag(config=True) + + write_mono_shower = Bool( + help="Store DL2 stereo mono geometry if available", default_value=False ).tag(config=True) compression_level = Int( @@ -124,7 +133,7 @@ class DL1Writer(Component): split_datasets_by = CaselessStrEnum( values=["tel_id", "tel_type"], default_value="tel_id", - help="Splitting level for the parameters and images datasets", + help="Splitting level for the DL1 parameters and images datasets", ).tag(config=True) compression_type = CaselessStrEnum( @@ -217,7 +226,14 @@ def __call__(self, event: ArrayEventContainer): ) # write telescope event data - self._write_telescope_events(self._writer, event) + self._write_dl1_telescope_events(self._writer, event) + + # write DL2 info if requested + if self.write_mono_shower: + self._write_dl2_telescope_events(self._writer, event) + + if self.write_stereo_shower: + self._write_dl2_stereo_event(self._writer, event) def setup(self): """called on first event""" @@ -476,7 +492,12 @@ def fill_from_simtel( containers=hist_container, ) - def _write_telescope_events(self, writer: TableWriter, event: ArrayEventContainer): + def table_name(self, tel_id, tel_type): + return f"tel_{tel_id:03d}" if self.split_datasets_by == "tel_id" else tel_type + + def _write_dl1_telescope_events( + self, writer: TableWriter, event: ArrayEventContainer + ): """ add entries to the event/telescope tables for each telescope in a single event @@ -487,7 +508,6 @@ def _write_telescope_events(self, writer: TableWriter, event: ArrayEventContaine for tel_id, dl1_camera in event.dl1.tel.items(): dl1_camera.prefix = "" # don't want a prefix for this container telescope = self._subarray.tel[tel_id] - tel_type = str(telescope) self.log.debug("WRITING TELESCOPE %s: %s", tel_id, telescope) tel_index = TelEventIndexContainer( @@ -506,9 +526,7 @@ def _write_telescope_events(self, writer: TableWriter, event: ArrayEventContaine ) self._last_pointing_tel[tel_id] = current_pointing - table_name = ( - f"tel_{tel_id:03d}" if self.split_datasets_by == "tel_id" else tel_type - ) + table_name = self.table_name(tel_id, str(telescope)) writer.write( "dl1/event/telescope/trigger", [tel_index, event.trigger.tel[tel_id]] @@ -550,6 +568,51 @@ def _write_telescope_events(self, writer: TableWriter, event: ArrayEventContaine [tel_index, event.simulation.tel[tel_id]], ) + def _write_dl2_telescope_events( + self, writer: TableWriter, event: ArrayEventContainer + ): + """ + write per-telescope DL2 shower information. + + Currently this writes to a single table per type of shower + reconstruction and per algorithm, with all telescopes combined. + """ + + subcontainers = ["geometry", "energy", "classification"] + + for tel_id, dl2_tel in event.dl2.tel.items(): + + tel_index = TelEventIndexContainer( + obs_id=event.index.obs_id, + event_id=event.index.event_id, + tel_id=np.int16(tel_id), + ) + + for container_name in subcontainers: + for algorithm, container in dl2_tel[container_name].items(): + writer.write( + table_name=f"dl2/event/mono/{container_name}/{algorithm}", + containers=[tel_index, container], + ) + + def _write_dl2_stereo_event(self, writer: TableWriter, event: ArrayEventContainer): + """ + write per-telescope DL2 shower information to e.g. + `/dl2/event/stereo/{geometry,energy,classification}/` + """ + + subcontainers = ["geometry", "energy", "classification"] + + for container_name in subcontainers: + for algorithm, container in event.dl2.stereo[container_name].items(): + # note this will only write info if the particular algorithm + # generated it (otherwise the algorithm map is empty, and no + # data will be written) + writer.write( + table_name=f"dl2/event/stereo/{container_name}/{algorithm}", + containers=[event.index, container], + ) + def _generate_table_indices(self, h5file, start_node): """ helper to generate PyTables index tabnles for common columns """ for node in h5file.iter_nodes(start_node): diff --git a/ctapipe/io/dl1eventsource.py b/ctapipe/io/dl1eventsource.py index a713f1161e6..7a2cd9c4c94 100644 --- a/ctapipe/io/dl1eventsource.py +++ b/ctapipe/io/dl1eventsource.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -COMPATIBLE_DL1_VERSIONS = ["v1.0.0", "v1.0.1", "v1.0.2", "v1.0.3", "v1.1.0"] +COMPATIBLE_DL1_VERSIONS = ["v1.0.0", "v1.0.1", "v1.0.2", "v1.0.3", "v1.1.0", "v1.2.0"] class DL1EventSource(EventSource): @@ -126,7 +126,7 @@ def is_compatible(file_path): if "CTA PRODUCT DATA LEVEL" not in metadata._v_attrnames: return False - if metadata["CTA PRODUCT DATA LEVEL"] != "DL1": + if "DL1" not in metadata["CTA PRODUCT DATA LEVEL"]: return False if "CTA PRODUCT DATA MODEL VERSION" not in metadata._v_attrnames: diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index 6573908bfb4..a9cbcee9087 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -63,7 +63,20 @@ class Product(HasTraits): id_ = Unicode(help="leave unspecified to automatically generate a UUID") data_category = Enum(["S", "A", "B", "C", "Other"], "Other") data_level = Enum( - ["R0", "R1", "DL0", "DL1", "DL2", "DL3", "DL4", "DL5", "DL6", "Other"], "Other" + [ + "R0", + "R1", + "DL0", + "DL1", + "DL1,DL2", + "DL2", + "DL3", + "DL4", + "DL5", + "DL6", + "Other", + ], + "Other", ) data_association = Enum(["Subarray", "Telescope", "Target", "Other"], "Other") data_model_name = Unicode("unknown") diff --git a/ctapipe/io/tests/test_dl1writer.py b/ctapipe/io/tests/test_datawriter.py similarity index 71% rename from ctapipe/io/tests/test_dl1writer.py rename to ctapipe/io/tests/test_datawriter.py index 9c1cf390d77..4cccff31b4a 100644 --- a/ctapipe/io/tests/test_dl1writer.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import numpy as np -from ctapipe.io.dl1writer import DL1Writer, DL1_DATA_MODEL_VERSION +from ctapipe.io.datawriter import DataWriter, DATA_MODEL_VERSION from ctapipe.utils import get_dataset_path from ctapipe.io import EventSource from ctapipe.calib import CameraCalibrator @@ -10,9 +10,26 @@ from copy import deepcopy import tables import logging +from astropy import units as u -def test_dl1writer(tmpdir: Path): +def generate_dummy_dl2(event): + """ generate some dummy DL2 info and see if we can write it """ + + algos = ["Hillas", "ImPACT"] + + for algo in algos: + for tel_id in event.dl1.tel: + event.dl2.tel[tel_id].geometry[algo].alt = 70 * u.deg + event.dl2.tel[tel_id].geometry[algo].az = 120 * u.deg + event.dl2.tel[tel_id].energy[algo].energy = 10 * u.TeV + + event.dl2.stereo.geometry[algo].alt = 72 * u.deg + event.dl2.stereo.geometry[algo].az = 121 * u.deg + event.dl2.stereo.energy[algo].energy = 10 * u.TeV + + +def test_dl1(tmpdir: Path): """ Check that we can write DL1 files @@ -30,7 +47,7 @@ def test_dl1writer(tmpdir: Path): ) calibrate = CameraCalibrator(subarray=source.subarray) - with DL1Writer( + with DataWriter( event_source=source, output_path=output_path, write_parameters=False, @@ -57,7 +74,7 @@ def test_dl1writer(tmpdir: Path): h5file.root._v_attrs[ "CTA PRODUCT DATA MODEL VERSION" ] # pylint: disable=protected-access - == DL1_DATA_MODEL_VERSION + == DATA_MODEL_VERSION ) shower = h5file.get_node("/simulation/event/subarray/shower") assert len(shower) > 0 @@ -67,9 +84,9 @@ def test_dl1writer(tmpdir: Path): ) # pylint: disable=protected-access -def test_dl1writer_int(tmpdir: Path): +def test_roundtrip(tmpdir: Path): """ - Check that we can write DL1 files + Check that we can write DL1+DL2 info to files and read them back Parameters ---------- @@ -77,7 +94,7 @@ def test_dl1writer_int(tmpdir: Path): temp directory fixture """ - output_path = Path(tmpdir / "events.dl1.h5") + output_path = Path(tmpdir / "events.DL1DL2.h5") source = EventSource( get_dataset_path("gamma_LaPalma_baseline_20Zd_180Az_prod3b_test.simtel.gz"), max_events=20, @@ -87,7 +104,7 @@ def test_dl1writer_int(tmpdir: Path): events = [] - with DL1Writer( + with DataWriter( event_source=source, output_path=output_path, write_parameters=False, @@ -98,13 +115,16 @@ def test_dl1writer_int(tmpdir: Path): transform_peak_time=True, peak_time_dtype="int16", peak_time_scale=100, - ) as write_dl1: - write_dl1.log.level = logging.DEBUG + write_stereo_shower=True, + write_mono_shower=True, + ) as write: + write.log.level = logging.DEBUG for event in source: calibrate(event) - write_dl1(event) + write(event) + generate_dummy_dl2(event) events.append(deepcopy(event)) - write_dl1.write_simulation_histograms(source) + write.write_simulation_histograms(source) assert output_path.exists() @@ -122,6 +142,15 @@ def test_dl1writer_int(tmpdir: Path): assert images.col("peak_time").dtype == np.int16 assert images.col("image").max() > 0.0 + # check that DL2 info is there + dl2_energy = h5file.get_node("/dl2/event/stereo/energy/ImPACT") + assert np.allclose(dl2_energy.col("energy"), 10) + + dl2_tel_energy = h5file.get_node("/dl2/event/mono/energy/Hillas") + assert np.allclose(dl2_tel_energy.col("energy"), 10) + + assert len(dl2_tel_energy.col("energy")) > len(dl2_energy.col("energy")) + # make sure it is readable by the event source and matches the images for event in EventSource(output_path): diff --git a/ctapipe/reco/HillasReconstructor.py b/ctapipe/reco/HillasReconstructor.py index 2b29055980d..fc1b5502740 100644 --- a/ctapipe/reco/HillasReconstructor.py +++ b/ctapipe/reco/HillasReconstructor.py @@ -10,7 +10,7 @@ InvalidWidthException, TooFewTelescopesException, ) -from ctapipe.containers import ReconstructedShowerContainer +from ctapipe.containers import ReconstructedGeometryContainer from itertools import combinations from ctapipe.coordinates import ( @@ -20,11 +20,7 @@ project_to_ground, MissingFrameAttributeWarning, ) -from astropy.coordinates import ( - SkyCoord, - spherical_to_cartesian, - cartesian_to_spherical, -) +from astropy.coordinates import SkyCoord, spherical_to_cartesian, cartesian_to_spherical import warnings import numpy as np @@ -181,7 +177,7 @@ class are set to np.nan # astropy's coordinates system rotates counter-clockwise. # Apparently we assume it to be clockwise. # that's why lon get's a sign - result = ReconstructedShowerContainer( + result = ReconstructedGeometryContainer( alt=lat, az=-lon, core_x=core_pos[0], @@ -233,7 +229,7 @@ def initialize_hillas_planes( focal_length=focal_length, telescope_pointing=pointing ) - cog_coord = SkyCoord(x=moments.x, y=moments.y, frame=camera_frame,) + cog_coord = SkyCoord(x=moments.x, y=moments.y, frame=camera_frame) cog_coord = cog_coord.transform_to(horizon_frame) p2_coord = SkyCoord(x=p2_x, y=p2_y, frame=camera_frame) diff --git a/ctapipe/reco/ImPACT.py b/ctapipe/reco/ImPACT.py index 384e0c4a65b..ea285c8fc25 100644 --- a/ctapipe/reco/ImPACT.py +++ b/ctapipe/reco/ImPACT.py @@ -21,7 +21,7 @@ from ctapipe.image import neg_log_likelihood, mean_poisson_likelihood_gaussian from ctapipe.instrument import get_atmosphere_profile_functions from ctapipe.containers import ( - ReconstructedShowerContainer, + ReconstructedGeometryContainer, ReconstructedEnergyContainer, ) from ctapipe.reco.reco_algorithms import Reconstructor @@ -722,7 +722,7 @@ def predict(self, shower_seed, energy_seed): ) # Create a container class for reconstructed shower - shower_result = ReconstructedShowerContainer() + shower_result = ReconstructedGeometryContainer() # Convert the best fits direction and core to Horizon and ground systems and # copy to the shower container diff --git a/ctapipe/reco/hillas_intersection.py b/ctapipe/reco/hillas_intersection.py index 223667b72fb..c7a4b48633f 100644 --- a/ctapipe/reco/hillas_intersection.py +++ b/ctapipe/reco/hillas_intersection.py @@ -16,7 +16,7 @@ InvalidWidthException, TooFewTelescopesException, ) -from ctapipe.containers import ReconstructedShowerContainer +from ctapipe.containers import ReconstructedGeometryContainer from ctapipe.instrument import get_atmosphere_profile_functions from astropy.coordinates import SkyCoord @@ -57,7 +57,7 @@ class HillasIntersection(Reconstructor): """ atmosphere_profile_name = traits.CaselessStrEnum( - ["paranal",], default_value="paranal", help="name of atmosphere profile to use" + ["paranal"], default_value="paranal", help="name of atmosphere profile to use" ).tag(config=True) weighting = traits.CaselessStrEnum( @@ -171,7 +171,7 @@ def predict(self, hillas_dict, subarray, array_pointing, telescopes_pointings=No nom = SkyCoord(fov_lat=src_x * u.rad, fov_lon=src_y * u.rad, frame=nom_frame) # nom = sky_pos.transform_to(nom_frame) sky_pos = nom.transform_to(array_pointing.frame) - tilt = SkyCoord(x=core_x * u.m, y=core_y * u.m, frame=tilted_frame,) + tilt = SkyCoord(x=core_x * u.m, y=core_y * u.m, frame=tilted_frame) grd = project_to_ground(tilt) x_max = self.reconstruct_xmax( nom.fov_lon, @@ -186,7 +186,7 @@ def predict(self, hillas_dict, subarray, array_pointing, telescopes_pointings=No src_error = np.sqrt(err_x ** 2 + err_y ** 2) - result = ReconstructedShowerContainer( + result = ReconstructedGeometryContainer( alt=sky_pos.altaz.alt.to(u.rad), az=sky_pos.altaz.az.to(u.rad), core_x=grd.x, diff --git a/ctapipe/reco/reco_algorithms.py b/ctapipe/reco/reco_algorithms.py index c3bd7e47988..2c9458979ea 100644 --- a/ctapipe/reco/reco_algorithms.py +++ b/ctapipe/reco/reco_algorithms.py @@ -1,5 +1,5 @@ from ctapipe.core import Component -from ctapipe.containers import ReconstructedShowerContainer +from ctapipe.containers import ReconstructedGeometryContainer __all__ = ["Reconstructor", "TooFewTelescopesException", "InvalidWidthException"] @@ -30,4 +30,4 @@ def predict(self, tels_dict): Standard `RecoShowerGeom` container """ - return ReconstructedShowerContainer() + return ReconstructedGeometryContainer() diff --git a/ctapipe/reco/tests/test_ImPACT.py b/ctapipe/reco/tests/test_ImPACT.py index 4990d4f50a5..478418d9d15 100644 --- a/ctapipe/reco/tests/test_ImPACT.py +++ b/ctapipe/reco/tests/test_ImPACT.py @@ -4,7 +4,7 @@ from numpy.testing import assert_allclose from ctapipe.containers import ( - ReconstructedShowerContainer, + ReconstructedGeometryContainer, ReconstructedEnergyContainer, ) from ctapipe.reco.ImPACT import ImPACTReconstructor @@ -143,7 +143,7 @@ def test_image_prediction(self): assert np.sum(pred) != 0 """Then check helper function gives the same answer""" - shower = ReconstructedShowerContainer() + shower = ReconstructedGeometryContainer() shower.is_valid = True shower.alt = 0 * u.deg shower.az = 0 * u.deg diff --git a/ctapipe/tools/stage1.py b/ctapipe/tools/stage1.py index 3002b3c1372..425ef355da4 100644 --- a/ctapipe/tools/stage1.py +++ b/ctapipe/tools/stage1.py @@ -10,8 +10,8 @@ from ..core.traits import Bool, List, classes_with_traits from ..image import ImageCleaner, ImageProcessor from ..image.extractor import ImageExtractor -from ..io import DataLevel, DL1Writer, EventSource, SimTelEventSource -from ..io.dl1writer import DL1_DATA_MODEL_VERSION +from ..io import DataLevel, DataWriter, EventSource, SimTelEventSource +from ..io.datawriter import DATA_MODEL_VERSION class Stage1Tool(Tool): @@ -21,7 +21,7 @@ class Stage1Tool(Tool): """ name = "ctapipe-stage1" - description = __doc__ + f" This currently writes {DL1_DATA_MODEL_VERSION} DL1 data" + description = __doc__ + f" This currently writes {DATA_MODEL_VERSION} DL1 data" examples = """ To process data with all default values: > ctapipe-stage1 --input events.simtel.gz --output events.dl1.h5 --progress @@ -37,7 +37,7 @@ class Stage1Tool(Tool): aliases = { "input": "EventSource.input_url", - "output": "DL1Writer.output_path", + "output": "DataWriter.output_path", "allowed-tels": "EventSource.allowed_tels", "max-events": "EventSource.max_events", "image-cleaner-type": "ImageProcessor.image_cleaner_type", @@ -45,19 +45,19 @@ class Stage1Tool(Tool): flags = { "write-images": ( - {"DL1Writer": {"write_images": True}}, + {"DataWriter": {"write_images": True}}, "store DL1/Event/Telescope images in output", ), "write-parameters": ( - {"DL1Writer": {"write_parameters": True}}, + {"DataWriter": {"write_parameters": True}}, "store DL1/Event/Telescope parameters in output", ), "write-index-tables": ( - {"DL1Writer": {"write_index_tables": True}}, + {"DataWriter": {"write_index_tables": True}}, "generate PyTables index tables for the parameter and image datasets", ), "overwrite": ( - {"DL1Writer": {"overwrite": True}}, + {"DataWriter": {"overwrite": True}}, "Overwrite output file if it exists", ), "progress": ( @@ -67,7 +67,7 @@ class Stage1Tool(Tool): } classes = ( - [CameraCalibrator, DL1Writer, ImageProcessor] + [CameraCalibrator, DataWriter, ImageProcessor] + classes_with_traits(EventSource) + classes_with_traits(ImageCleaner) + classes_with_traits(ImageExtractor) @@ -95,7 +95,7 @@ def setup(self): is_simulation=self.event_source.is_simulation, parent=self, ) - self.write_dl1 = DL1Writer(event_source=self.event_source, parent=self) + self.write_dl1 = DataWriter(event_source=self.event_source, parent=self) # warn if max_events prevents writing the histograms if ( @@ -111,7 +111,7 @@ def setup(self): def _write_processing_statistics(self): """ write out the event selection stats, etc. """ - # NOTE: don't remove this, not part of DL1Writer + # NOTE: don't remove this, not part of DataWriter image_stats = self.process_images.check_image.to_table(functions=True) image_stats.write( self.write_dl1.output_path, diff --git a/examples/stage1_config.json b/examples/stage1_config.json index 51476a37786..c17123ebfe1 100644 --- a/examples/stage1_config.json +++ b/examples/stage1_config.json @@ -1,5 +1,5 @@ { - "DL1Writer": { + "DataWriter": { "overwrite": false, "write_images": true, "write_parameters": true, From 35ecb7e07c80352033b09c62178dd1bce4415ad7 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 10:02:36 +0200 Subject: [PATCH 03/33] more general loop over reconstruction containers --- ctapipe/io/datawriter.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 911d498a868..b4879270bb8 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -578,8 +578,6 @@ def _write_dl2_telescope_events( reconstruction and per algorithm, with all telescopes combined. """ - subcontainers = ["geometry", "energy", "classification"] - for tel_id, dl2_tel in event.dl2.tel.items(): tel_index = TelEventIndexContainer( @@ -588,8 +586,8 @@ def _write_dl2_telescope_events( tel_id=np.int16(tel_id), ) - for container_name in subcontainers: - for algorithm, container in dl2_tel[container_name].items(): + for container_name, algorithm_map in dl2_tel.items(): + for algorithm, container in algorithm_map.items(): writer.write( table_name=f"dl2/event/mono/{container_name}/{algorithm}", containers=[tel_index, container], @@ -601,10 +599,8 @@ def _write_dl2_stereo_event(self, writer: TableWriter, event: ArrayEventContaine `/dl2/event/stereo/{geometry,energy,classification}/` """ - subcontainers = ["geometry", "energy", "classification"] - - for container_name in subcontainers: - for algorithm, container in event.dl2.stereo[container_name].items(): + for container_name, algorithm_map in event.dl2.stereo.items(): + for algorithm, container in algorithm_map.items(): # note this will only write info if the particular algorithm # generated it (otherwise the algorithm map is empty, and no # data will be written) From b7f89d8ebcb7fcd58106ea959c12cc09f5686ff6 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 10:36:34 +0200 Subject: [PATCH 04/33] allow multiple data levels in reference metadata headers --- ctapipe/io/datawriter.py | 31 ++++++++++++++++++++---- ctapipe/io/metadata.py | 37 ++++++++++++++++------------- ctapipe/io/tests/test_datawriter.py | 5 +++- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index b4879270bb8..c4c7d758fd6 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -24,8 +24,9 @@ from ..io import metadata as meta from ..io.tableio import FixedPointColumnTransform from ..instrument import SubarrayDescription +from ..io.datalevels import DataLevel -__all__ = ["DL1Writer", "DL1_DATA_MODEL_VERSION", "write_reference_metadata_headers"] +__all__ = ["DataWriter", "DATA_MODEL_VERSION", "write_reference_metadata_headers"] tables.parameters.NODE_CACHE_SLOTS = 3000 # fixes problem with too many datasets @@ -39,7 +40,9 @@ PROV = Provenance() -def write_reference_metadata_headers(obs_ids, subarray, writer, is_simulation): +def write_reference_metadata_headers( + obs_ids, subarray, writer, is_simulation, data_levels +): """ Attaches Core Provenence headers to an output HDF5 file. Right now this is hard-coded for use with the ctapipe-stage1-process tool @@ -54,15 +57,19 @@ def write_reference_metadata_headers(obs_ids, subarray, writer, is_simulation): SubarrayDescription to get metadata from writer: HDF5TableWriter output + data_levels: List[DataLevel] + list of data levels that were requested/generated + (e.g. from `DataWriter.output_data_levels`) """ activity = PROV.current_activity.provenance + category = "Sim" if is_simulation else "Other" reference = meta.Reference( contact=meta.Contact(name="", email="", organization="CTA Consortium"), product=meta.Product( - description="DL1 Data Product", - data_category="S", # TODO: make this automatically filled - data_level="DL1,DL2", # TODO: make this automatically filled + description="ctapipe Data Product", + data_category=category, + data_level=[l.name for l in data_levels], data_association="Subarray", data_model_name="ASWG", data_model_version=DATA_MODEL_VERSION, @@ -260,10 +267,24 @@ def finish(self): obs_ids=self.event_source.obs_ids, writer=self._writer, is_simulation=self._is_simulation, + data_levels=self.output_data_levels, ) self._writer.close() self._writer = None + @property + def output_data_levels(self): + """ returns a list of data levels requested """ + data_levels = [] + if self.write_images: + data_levels.append(DataLevel.DL1_IMAGES) + if self.write_parameters: + data_levels.append(DataLevel.DL1_PARAMETERS) + if self.write_stereo_shower or self.write_mono_shower: + data_levels.append(DataLevel.DL2) + + return data_levels + def _setup_compression(self): """ setup HDF5 compression""" self._hdf5_filters = tables.Filters( diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index a9cbcee9087..9db7392e118 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -26,7 +26,7 @@ import warnings from collections import OrderedDict -from traitlets import Enum, Unicode, Int, HasTraits, default, Instance +from traitlets import Enum, Unicode, Int, HasTraits, default, Instance, List from ctapipe.core.provenance import _ActivityProvenance from ctapipe.core.traits import AstroTime @@ -44,7 +44,7 @@ from astropy.time import Time -CONVERSIONS = {Time: lambda t: t.utc.iso} +CONVERSIONS = {Time: lambda t: t.utc.iso, list: lambda l: str(l)} class Contact(HasTraits): @@ -61,22 +61,25 @@ class Product(HasTraits): description = Unicode("unknown") creation_time = AstroTime() id_ = Unicode(help="leave unspecified to automatically generate a UUID") - data_category = Enum(["S", "A", "B", "C", "Other"], "Other") - data_level = Enum( - [ - "R0", - "R1", - "DL0", - "DL1", - "DL1,DL2", - "DL2", - "DL3", - "DL4", - "DL5", - "DL6", + data_category = Enum(["Sim", "A", "B", "C", "Other"], "Other") + data_level = List( + Enum( + [ + "R0", + "R1", + "DL0", + "DL1", + "DL1_IMAGES", + "DL1_PARAMETERS", + "DL2", + "DL3", + "DL4", + "DL5", + "DL6", + "Other", + ], "Other", - ], - "Other", + ) ) data_association = Enum(["Subarray", "Telescope", "Target", "Other"], "Other") data_model_name = Unicode("unknown") diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index 4cccff31b4a..ed9288d5a12 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -3,7 +3,7 @@ import numpy as np from ctapipe.io.datawriter import DataWriter, DATA_MODEL_VERSION from ctapipe.utils import get_dataset_path -from ctapipe.io import EventSource +from ctapipe.io import EventSource, DataLevel from ctapipe.calib import CameraCalibrator from pathlib import Path from ctapipe.instrument import SubarrayDescription @@ -125,6 +125,9 @@ def test_roundtrip(tmpdir: Path): generate_dummy_dl2(event) events.append(deepcopy(event)) write.write_simulation_histograms(source) + assert DataLevel.DL1_IMAGES in write.output_data_levels + assert DataLevel.DL1_PARAMETERS not in write.output_data_levels + assert DataLevel.DL2 in write.output_data_levels assert output_path.exists() From e2a0d3f231c0d3c1de8a59d473aef6585c8cbe0c Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 10:47:04 +0200 Subject: [PATCH 05/33] update docs --- ctapipe/io/__init__.py | 2 +- ctapipe/io/datawriter.py | 4 ++-- docs/data_models/dl1.rst | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/ctapipe/io/__init__.py b/ctapipe/io/__init__.py index 1e3dc01c9cc..f025c06bfb5 100644 --- a/ctapipe/io/__init__.py +++ b/ctapipe/io/__init__.py @@ -29,5 +29,5 @@ "DL1EventSource", "DataLevel", "read_table", - "DL1Writer", + "DataWriter", ] diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index c4c7d758fd6..39b64b1b612 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -106,11 +106,11 @@ class DataWriter(Component): .. code-block:: python - with DL1Writer(parent=self) as write_dl1: + with DataWriter(parent=self) as write_data: for event in source: calibrate(event) process_images(event) - write_dl1(event) + write_data(event) """ output_path = Path( diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index 2324b883fe1..478dc0872ee 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -2,17 +2,17 @@ .. currentmodule:: ctapipe.io.containers -============== -DL1 Data Model -============== +================== +CTAPipe Data Model +================== -The DL1 files are HDF5 format files, with the following data set hierarchy. The tables +The files are HDF5 format files, with the following data set hierarchy. The tables should be written with `pytables` (not `h5py`), ideally with the `ctapipe.io.HDF5TableWriter`, which ensures the unit and other descriptive metadata are attached to the output. Containers marked with a `+` should be written without their prefix (all others should use column prefixes). The following describes the contents of data level 1 (DL1) output files -generated by ctapipe (e.g. the `ctapipe-stage1-process` tool which uses the `ctapipe.io.DL1Writer` component to generate output data). +generated by ctapipe (e.g. the `ctapipe-stage1-process` tool which uses the `ctapipe.io.DataWriter` component to generate output data). -------------- @@ -29,9 +29,6 @@ The following datasets will be written to the group `/dl1/event/` in the output * - Group/Dataset - Description - Contents - * - event/subarray - - event-wise data pertaining to a subarray - - (group) * - event/subarray/trigger - subarray trigger information - `EventIndexContainer` +, `CentralTriggerContainer` @@ -45,6 +42,30 @@ The following datasets will be written to the group `/dl1/event/` in the output - tables of telescope images (one per telescope) - `TelEventIndexContainer` +, `DL1CameraContainer` +-------------- +DL2 Data Model +-------------- + +This describes data that change per-event. +The following datasets will be written to the group `/dl2/event/stereo/` and or `/dl2/event/mono/`, one for each reconstruciton algorithm in the output file: + +.. list-table:: + :widths: 25 50 25 + :header-rows: 1 + + * - Group/Dataset + - Description + - Contents + * /geometry + - shower geometry reconstruction + - `EventIndexContainer`, `ReconstructedGeometryContainer` + * /energy + - shower energy reconstruction + - `EventIndexContainer`, `ReconstructedEnergyContainer` + * /classification + - shower classification parameters + - `EventIndexContainer`, `ParticleClassificationContainer` + --------------------- Simulation Data Model --------------------- From 3484a06d2232403a0aaeadf272539ccc1ef6538f Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 10:48:19 +0200 Subject: [PATCH 06/33] minor doc correction --- docs/data_models/dl1.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index 478dc0872ee..c2fca4cd6a9 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -46,8 +46,11 @@ The following datasets will be written to the group `/dl1/event/` in the output DL2 Data Model -------------- -This describes data that change per-event. -The following datasets will be written to the group `/dl2/event/stereo/` and or `/dl2/event/mono/`, one for each reconstruciton algorithm in the output file: +This describes data that change per-event. The following datasets will be +written to the group `/dl2/event/stereo//` and or +`/dl2/event/mono//`, one for each reconstruction algorithm in the +output file, where `` is the identifier of the algorithm (e.g. +"Hillas"): .. list-table:: :widths: 25 50 25 From 93b98465113e2d922f11fd661d9510aa97d3ec9c Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 11:23:44 +0200 Subject: [PATCH 07/33] update __all__ --- ctapipe/containers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ctapipe/containers.py b/ctapipe/containers.py index f2a1047552e..397ff131099 100644 --- a/ctapipe/containers.py +++ b/ctapipe/containers.py @@ -18,6 +18,7 @@ "DL1CameraCalibrationContainer", "DL1CameraContainer", "DL1Container", + "DL2Container", "EventCalibrationContainer", "EventCameraCalibrationContainer", "EventIndexContainer", @@ -38,7 +39,7 @@ "R1Container", "ReconstructedContainer", "ReconstructedEnergyContainer", - "ReconstructedShowerContainer", + "ReconstructedGeometryContainer", "SimulatedCameraContainer", "SimulatedShowerContainer", "SimulatedShowerDistribution", @@ -526,7 +527,7 @@ class ReconstructedGeometryContainer(Container): nan, "average intensity of the intensities used for reconstruction" ) goodness_of_fit = Field(nan, "measure of algorithm success (if fit)") - tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") + tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono") class ReconstructedEnergyContainer(Container): @@ -547,7 +548,7 @@ class ReconstructedEnergyContainer(Container): ), ) goodness_of_fit = Field(nan, "goodness of the algorithm fit") - tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") + tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono") class ParticleClassificationContainer(Container): @@ -571,7 +572,7 @@ class ParticleClassificationContainer(Container): ) is_valid = Field(False, "true if classification parameters are valid") goodness_of_fit = Field(nan, "goodness of the algorithm fit") - tel_id_list = Field(None, "list of tel_ids used if stereo, or None if Mono") + tel_ids = Field(None, "list of tel_ids used if stereo, or None if Mono") class ReconstructedContainer(Container): From 6c23a3043010a6e9c4d140b4d6a0b59021e00ef5 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 11:24:01 +0200 Subject: [PATCH 08/33] fix metadata tests (now data level requires list) --- ctapipe/io/tests/test_metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctapipe/io/tests/test_metadata.py b/ctapipe/io/tests/test_metadata.py index dcd8a1c85ce..6c9eb98ad9b 100644 --- a/ctapipe/io/tests/test_metadata.py +++ b/ctapipe/io/tests/test_metadata.py @@ -21,8 +21,8 @@ def test_construct_and_write_metadata(tmp_path): product=meta.Product( description="An Amazing Product", creation_time="2020-10-11 15:23:31", - data_category="S", - data_level="DL1", + data_category="Sim", + data_level=["DL1"], data_association="Subarray", data_model_name="Unofficial DL1", data_model_version="1.0", From 1cb55a016a0f56c4a648981f9ccbc5cf95f6eed5 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 11:24:19 +0200 Subject: [PATCH 09/33] fix metadata --- ctapipe/tools/dl1_merge.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ctapipe/tools/dl1_merge.py b/ctapipe/tools/dl1_merge.py index 06ab38352b2..1a146214d88 100644 --- a/ctapipe/tools/dl1_merge.py +++ b/ctapipe/tools/dl1_merge.py @@ -357,7 +357,9 @@ def start(self): ): if not DL1EventSource.is_compatible(current_file): - self.log.critical(f"input file {current_file} is not a supported DL1 file") + self.log.critical( + f"input file {current_file} is not a supported DL1 file" + ) if self.skip_broken_files: continue else: @@ -407,10 +409,10 @@ def finish(self): contact=meta.Contact(name="", email="", organization="CTA Consortium"), product=meta.Product( description="Merged DL1 Data Product", - data_category="S", - data_level="DL1", + data_category="Sim", # TODO: copy this from the inputs + data_level=["DL1"], # TODO: copy this from inputs data_association="Subarray", - data_model_name="ASWG DL1", + data_model_name="ASWG", # TODO: copy this from inputs data_model_version=self.data_model_version, data_model_url="", format="hdf5", From 166080e53b252767948427e26db7d597675e87b2 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 11:46:06 +0200 Subject: [PATCH 10/33] doc fix --- docs/data_models/dl1.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index c2fca4cd6a9..a2e51792578 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -1,4 +1,4 @@ -.. _dl1_datamodel: +.. _datamodel: .. currentmodule:: ctapipe.io.containers @@ -59,13 +59,13 @@ output file, where `` is the identifier of the algorithm (e.g. * - Group/Dataset - Description - Contents - * /geometry + * - /geometry - shower geometry reconstruction - `EventIndexContainer`, `ReconstructedGeometryContainer` - * /energy + * - /energy - shower energy reconstruction - `EventIndexContainer`, `ReconstructedEnergyContainer` - * /classification + * - /classification - shower classification parameters - `EventIndexContainer`, `ParticleClassificationContainer` From db7928323a552431f7c0fef0992c659cddc0d6ec Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 11:50:16 +0200 Subject: [PATCH 11/33] another doc fix --- docs/ctapipe_api/io/index.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/ctapipe_api/io/index.rst b/docs/ctapipe_api/io/index.rst index 678c1a534ab..babb73a4d79 100644 --- a/docs/ctapipe_api/io/index.rst +++ b/docs/ctapipe_api/io/index.rst @@ -118,19 +118,19 @@ only supports scalar values). Writing Output Files: ===================== -The `DL1Writer` Component allows one to write a series of events (stored in -`ctapipe.containers.ArrayEventContainer`) to a standardized HDF5 format DL1 file -following the DL1 data model. This includes all related datasets such as the -instrument and simulation configuration information, simulated shower and image -information, and observed images and parameters. It can be used in an event loop -like: +The `DataWriter` Component allows one to write a series of events (stored in +`ctapipe.containers.ArrayEventContainer`) to a standardized HDF5 format file +following the data model (see data_model_). This includes all related datasets +such as the instrument and simulation configuration information, simulated +shower and image information, observed images and parameters and reconstruction +information. It can be used in an event loop like: .. code-block:: python - with DL1Writer(event_source=source, output_path="events.dl1.h5") as write_dl1: + with DataWriter(event_source=source, output_path="events.dl1.h5") as write_data: for event in source: calibrate(event) - write_dl1(event) + write_data(event) Reading Output Tables: ====================== From 132448b06c51a0bdd66fe9e1d4214165246a41a6 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 15:34:06 +0200 Subject: [PATCH 12/33] renamed output_data_level to datalevels as in EventSource --- ctapipe/io/datawriter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 39b64b1b612..2d383d134a9 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -59,7 +59,7 @@ def write_reference_metadata_headers( output data_levels: List[DataLevel] list of data levels that were requested/generated - (e.g. from `DataWriter.output_data_levels`) + (e.g. from `DataWriter.datalevels`) """ activity = PROV.current_activity.provenance category = "Sim" if is_simulation else "Other" @@ -267,13 +267,13 @@ def finish(self): obs_ids=self.event_source.obs_ids, writer=self._writer, is_simulation=self._is_simulation, - data_levels=self.output_data_levels, + data_levels=self.datalevels, ) self._writer.close() self._writer = None @property - def output_data_levels(self): + def datalevels(self): """ returns a list of data levels requested """ data_levels = [] if self.write_images: From aa58531164fc5a83df747c4c23efe79d22ea4b54 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 17:05:10 +0200 Subject: [PATCH 13/33] fix test --- ctapipe/io/tests/test_datawriter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index ed9288d5a12..ab132f4c7a6 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -125,9 +125,9 @@ def test_roundtrip(tmpdir: Path): generate_dummy_dl2(event) events.append(deepcopy(event)) write.write_simulation_histograms(source) - assert DataLevel.DL1_IMAGES in write.output_data_levels - assert DataLevel.DL1_PARAMETERS not in write.output_data_levels - assert DataLevel.DL2 in write.output_data_levels + assert DataLevel.DL1_IMAGES in write.datalevels + assert DataLevel.DL1_PARAMETERS not in write.datalevels + assert DataLevel.DL2 in write.datalevels assert output_path.exists() From 0567f64649482c0bb78a81dde6b6688c25c56c76 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 17:10:25 +0200 Subject: [PATCH 14/33] change group names to telescope and subarray to match other data models --- ctapipe/io/datawriter.py | 4 ++-- ctapipe/io/tests/test_datawriter.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 2d383d134a9..ec3efe30660 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -610,7 +610,7 @@ def _write_dl2_telescope_events( for container_name, algorithm_map in dl2_tel.items(): for algorithm, container in algorithm_map.items(): writer.write( - table_name=f"dl2/event/mono/{container_name}/{algorithm}", + table_name=f"dl2/event/telescope/{container_name}/{algorithm}", containers=[tel_index, container], ) @@ -626,7 +626,7 @@ def _write_dl2_stereo_event(self, writer: TableWriter, event: ArrayEventContaine # generated it (otherwise the algorithm map is empty, and no # data will be written) writer.write( - table_name=f"dl2/event/stereo/{container_name}/{algorithm}", + table_name=f"dl2/event/subarray/{container_name}/{algorithm}", containers=[event.index, container], ) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index ab132f4c7a6..5d9e38a85aa 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -146,10 +146,10 @@ def test_roundtrip(tmpdir: Path): assert images.col("image").max() > 0.0 # check that DL2 info is there - dl2_energy = h5file.get_node("/dl2/event/stereo/energy/ImPACT") + dl2_energy = h5file.get_node("/dl2/event/subarray/energy/ImPACT") assert np.allclose(dl2_energy.col("energy"), 10) - dl2_tel_energy = h5file.get_node("/dl2/event/mono/energy/Hillas") + dl2_tel_energy = h5file.get_node("/dl2/event/telescope/energy/Hillas") assert np.allclose(dl2_tel_energy.col("energy"), 10) assert len(dl2_tel_energy.col("energy")) > len(dl2_energy.col("energy")) From a5de494999b6e716edc3bbe152166aace6b91b27 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 17:19:31 +0200 Subject: [PATCH 15/33] also test classification --- ctapipe/io/tests/test_datawriter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index 5d9e38a85aa..34ac4a00309 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -23,10 +23,13 @@ def generate_dummy_dl2(event): event.dl2.tel[tel_id].geometry[algo].alt = 70 * u.deg event.dl2.tel[tel_id].geometry[algo].az = 120 * u.deg event.dl2.tel[tel_id].energy[algo].energy = 10 * u.TeV + event.dl2.tel[tel_id].classification[algo].prediction = 0.9 - event.dl2.stereo.geometry[algo].alt = 72 * u.deg - event.dl2.stereo.geometry[algo].az = 121 * u.deg - event.dl2.stereo.energy[algo].energy = 10 * u.TeV + event.dl2.stereo.geometry[algo].alt = 72 * u.deg + event.dl2.stereo.geometry[algo].az = 121 * u.deg + event.dl2.stereo.geometry[algo].tel_ids = [1, 4, 5] + event.dl2.stereo.energy[algo].energy = 10 * u.TeV + event.dl2.stereo.classification[algo].prediction = 0.9 def test_dl1(tmpdir: Path): From 8391d5e871dc975c03afe698da5660e2bf70991f Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 18:55:49 +0200 Subject: [PATCH 16/33] added Transform for variable list of tel ids to fixed-length mask --- ctapipe/io/tableio.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ctapipe/io/tableio.py b/ctapipe/io/tableio.py index 4f28dd7a97a..51308df89e6 100644 --- a/ctapipe/io/tableio.py +++ b/ctapipe/io/tableio.py @@ -1,3 +1,4 @@ +from ctapipe.instrument.subarray import SubarrayDescription import re from abc import ABCMeta, abstractmethod from collections import defaultdict @@ -312,3 +313,25 @@ def inverse(self, value): def get_meta(self, colname): return {f"{colname}_TRANSFORM": "enum", f"{colname}_ENUM": self.enum} + + +class TelListToMaskTransform(ColumnTransform): + """ convert variable-length list of tel_ids to a fixed-length mask """ + + def __init__(self, subarray: SubarrayDescription): + self._forward = subarray.tel_ids_to_mask + self._inverse = subarray.tel_mask_to_tel_ids + + def __call__(self, value): + if value is None: + return None + + return self._forward(value) + + def inverse(self, value): + if value is None: + return None + return self._inverse(value) + + def get_meta(self, colname): + return {f"{colname}_TRANSFORM": "tel_list_to_mask"} From 92de75e9a941eb71c953da258cfccf8ebd8d8f60 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 7 Apr 2021 18:56:07 +0200 Subject: [PATCH 17/33] properly transform the tel_ids list to a mask --- ctapipe/io/datawriter.py | 41 ++++++++++++++++++++++++++--- ctapipe/io/tests/test_datawriter.py | 12 +++++---- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index ec3efe30660..3840726b926 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -16,13 +16,14 @@ ArrayEventContainer, SimulatedShowerDistribution, TelEventIndexContainer, + ReconstructedContainer, ) from ..core import Component, Container, Field, Provenance, ToolConfigurationError from ..core.traits import Bool, CaselessStrEnum, Int, Path, Float, Unicode from ..io import EventSource, HDF5TableWriter, TableWriter from ..io.simteleventsource import SimTelEventSource from ..io import metadata as meta -from ..io.tableio import FixedPointColumnTransform +from ..io.tableio import FixedPointColumnTransform, TelListToMaskTransform from ..instrument import SubarrayDescription from ..io.datalevels import DataLevel @@ -258,7 +259,7 @@ def setup(self): def finish(self): """ called after all events are done """ - self.log.info("Finishing DL1 output") + self.log.info("Finishing output") if self._writer: if self.write_index_tables: self._generate_indices() @@ -333,10 +334,12 @@ def _setup_writer(self): filters=self._hdf5_filters, ) + tr_tel_list_to_mask = TelListToMaskTransform(self._subarray) + writer.add_column_transform( table_name="dl1/event/subarray/trigger", col_name="tels_with_trigger", - transform=self._subarray.tel_ids_to_mask, + transform=tr_tel_list_to_mask, ) # exclude some columns that are not writable @@ -410,6 +413,38 @@ def _setup_writer(self): for table_name in table_names_tel_id: writer.exclude(f"/dl1/monitoring/event/pointing/{table_name}", "event_type") + # set the transforms for the tel_lists in the DL2 reconstructed + # parameters. This requires looping over not only telescopes, but also + # potential Algorithm names. + # + # TODO: To reduce circular dependencies, the algorithm names are + # currently hard-coded, but a future refactoring could be to dynamically + # skip these columns on the first written event, or to modify + # TableWriter.add_column_transform to accept a table *pattern* (regexp) + # instead of a name, otherwise this will break if we add another + # Reconstructor + + table_names_reco_algorithms = [ + "HillasReconstructor", + "HillasIntersection", + "ImPACTReconstructor", + ] + + for dataset_name in ReconstructedContainer().keys(): + for table_name in table_names_reco_algorithms: + writer.add_column_transform( + table_name=f"dl2/event/telescope/{dataset_name}/{table_name}", + col_name="tel_ids", + transform=tr_tel_list_to_mask, + ) + writer.add_column_transform( + table_name=f"dl2/event/subarray/{dataset_name}/{table_name}", + col_name="tel_ids", + transform=tr_tel_list_to_mask, + ) + + # final initialization + self._writer = writer self.log.debug("Writer initialized: %s", self._writer) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index 34ac4a00309..5f7b30bc1e1 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -16,7 +16,7 @@ def generate_dummy_dl2(event): """ generate some dummy DL2 info and see if we can write it """ - algos = ["Hillas", "ImPACT"] + algos = ["HillasReconstructor", "ImPACTReconstructor"] for algo in algos: for tel_id in event.dl1.tel: @@ -27,7 +27,7 @@ def generate_dummy_dl2(event): event.dl2.stereo.geometry[algo].alt = 72 * u.deg event.dl2.stereo.geometry[algo].az = 121 * u.deg - event.dl2.stereo.geometry[algo].tel_ids = [1, 4, 5] + event.dl2.stereo.energy[algo].tel_ids = [1, 4, 5] event.dl2.stereo.energy[algo].energy = 10 * u.TeV event.dl2.stereo.classification[algo].prediction = 0.9 @@ -149,12 +149,14 @@ def test_roundtrip(tmpdir: Path): assert images.col("image").max() > 0.0 # check that DL2 info is there - dl2_energy = h5file.get_node("/dl2/event/subarray/energy/ImPACT") + dl2_energy = h5file.get_node("/dl2/event/subarray/energy/ImPACTReconstructor") assert np.allclose(dl2_energy.col("energy"), 10) + assert np.count_nonzero(dl2_energy.col("tel_ids")[0]) == 3 - dl2_tel_energy = h5file.get_node("/dl2/event/telescope/energy/Hillas") + dl2_tel_energy = h5file.get_node( + "/dl2/event/telescope/energy/HillasReconstructor" + ) assert np.allclose(dl2_tel_energy.col("energy"), 10) - assert len(dl2_tel_energy.col("energy")) > len(dl2_energy.col("energy")) # make sure it is readable by the event source and matches the images From 14bd56e5ce7b50c9a4e5753fcbaf90682a3a204c Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Tue, 27 Apr 2021 15:21:18 +0200 Subject: [PATCH 18/33] Fix test after merge --- ctapipe/io/tests/test_datawriter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index fc0165ca189..cf90700f17b 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -192,14 +192,14 @@ def test_dl1writer_no_events(tmpdir: Path): assert source.file_.histograms is not None - with DL1Writer( + with DataWriter( event_source=source, output_path=output_path, write_parameters=True, write_images=True, - ) as write_dl1: - write_dl1.log.level = logging.DEBUG - write_dl1.write_simulation_histograms(source) + ) as writer: + writer.log.level = logging.DEBUG + writer.write_simulation_histograms(source) assert output_path.exists() From f3e4b068e9a7b809680407fd8365e6bcb550dba0 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 14:40:52 +0200 Subject: [PATCH 19/33] fix test (telid 5 was not in event source) --- ctapipe/io/tests/test_datawriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index cf90700f17b..92c774d23c7 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -27,7 +27,7 @@ def generate_dummy_dl2(event): event.dl2.stereo.geometry[algo].alt = 72 * u.deg event.dl2.stereo.geometry[algo].az = 121 * u.deg - event.dl2.stereo.energy[algo].tel_ids = [1, 4, 5] + event.dl2.stereo.energy[algo].tel_ids = [1, 2, 4] event.dl2.stereo.energy[algo].energy = 10 * u.TeV event.dl2.stereo.classification[algo].prediction = 0.9 From fe70894cf7e35ce37eb32b790e9a25c69337ded2 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 18:29:31 +0200 Subject: [PATCH 20/33] Allow regexp in table name for TableWriter.exclude() (#1717) * Allow regexp in table name for TableWriter.exclude() * updated docstring --- ctapipe/io/tableio.py | 25 ++++++++++++--------- ctapipe/io/tests/test_hdf5.py | 42 ++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/ctapipe/io/tableio.py b/ctapipe/io/tableio.py index 4b0679650d9..86ac456852d 100644 --- a/ctapipe/io/tableio.py +++ b/ctapipe/io/tableio.py @@ -43,24 +43,27 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def exclude(self, table_name, pattern): - """ - Exclude any columns (Fields) matching the pattern from being written + def exclude(self, table_regexp, pattern): + """Exclude any columns (Fields) matching the pattern from being written. The + patterns are matched with re.fullmatch. Parameters ---------- - table_name: str - name of table on which to apply the exclusion + table_regexp: str + regexp matching to match table name to apply exclusion pattern: str - regular expression string to match column name + regular expression string to match column name in the table + """ - table_name = table_name.lstrip("/") - self._exclusions[table_name].append(re.compile(pattern)) + table_regexp = table_regexp.lstrip("/") + self._exclusions[table_regexp].append(re.compile(pattern)) def _is_column_excluded(self, table_name, col_name): - for pattern in self._exclusions[table_name]: - if pattern.fullmatch(col_name): - return True + for table_regexp, col_regexp_list in self._exclusions.items(): + if re.fullmatch(table_regexp, table_name): + for col_regexp in col_regexp_list: + if col_regexp.fullmatch(col_name): + return True return False def add_column_transform(self, table_name, col_name, transform): diff --git a/ctapipe/io/tests/test_hdf5.py b/ctapipe/io/tests/test_hdf5.py index a383c8ec39b..054091fd171 100644 --- a/ctapipe/io/tests/test_hdf5.py +++ b/ctapipe/io/tests/test_hdf5.py @@ -40,7 +40,6 @@ def test_write_container(temp_h5_file): with HDF5TableWriter( temp_h5_file, group_name="R0", filters=tables.Filters(complevel=7) ) as writer: - writer.exclude("tel_002", ".*samples") # test exclusion of columns for ii in range(100): r0tel.waveform[:] = np.random.uniform(size=(50, 10)) @@ -586,6 +585,47 @@ def create_stream(n_event): assert isinstance(data.event_type, WithIntEnum.EventType) +def test_column_exclusions(tmp_path): + """test if we can exclude columns using regexps for the table and column name""" + tmp_file = tmp_path / "test_column_exclusions.hdf5" + + class SomeContainer(Container): + container_prefix = "" + hillas_x = Field(None) + hillas_y = Field(None) + impact_x = Field(None) + impact_y = Field(None) + + cont = SomeContainer(hillas_x=10, hillas_y=10, impact_x=15, impact_y=15) + + with HDF5TableWriter(tmp_file) as writer: + + # don't write hillas columns for any table + writer.exclude(".*table", "hillas_.*") + + # exclude a specific column + writer.exclude("data/anothertable", "impact_x") + print(writer._exclusions) + + writer.write("data/mytable", cont) + writer.write("data/anothertable", cont) + + # check that we get back the transformed values (note here a round trip will + # not work, as there is no inverse transform in this test) + with HDF5TableReader(tmp_file, mode="r") as reader: + data = next(reader.read("/data/mytable", SomeContainer())) + assert data.hillas_x is None + assert data.hillas_y is None + assert data.impact_x == 15 + assert data.impact_y == 15 + + data = next(reader.read("/data/anothertable", SomeContainer())) + assert data.hillas_x is None + assert data.hillas_y is None + assert data.impact_x is None + assert data.impact_y == 15 + + def test_column_transforms(tmp_path): """ ensure a user-added column transform is applied """ from ctapipe.containers import NAN_TIME From d47b8f87f9c33dee014bb1ebafd25c475b812649 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 19:46:26 +0200 Subject: [PATCH 21/33] clean up output and setup - setup now uses regexps for exclusions and transforms, much simpler - output is now split like DL1 for telescope/mono output --- ctapipe/io/datawriter.py | 142 +++++++++++----------------- ctapipe/io/tests/test_datawriter.py | 6 +- 2 files changed, 57 insertions(+), 91 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 6ce28ba0783..5380eafd46f 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -337,106 +337,67 @@ def _setup_writer(self): transform=tr_tel_list_to_mask, ) - # exclude some columns that are not writable + # avoid some warnings about unwritable columns (which here are just sub-containers) writer.exclude("dl1/event/subarray/trigger", "tel") writer.exclude("dl1/monitoring/subarray/pointing", "tel") + writer.exclude(f"/dl1/event/telescope/images/.*", "parameters") + + # currently the trigger info is used for the event time, but we dont' + # want the other bits of the trigger container in the pointing or other + # montitoring containers writer.exclude("dl1/monitoring/subarray/pointing", "event_type") writer.exclude("dl1/monitoring/subarray/pointing", "tels_with_trigger") writer.exclude("dl1/monitoring/subarray/pointing", "n_trigger_pixels") writer.exclude("/dl1/event/telescope/trigger", "trigger_pixels") + writer.exclude("/dl1/monitoring/telescope/pointing/.*", "n_trigger_pixels") + writer.exclude("/dl1/monitoring/telescope/pointing/.*", "trigger_pixels") + writer.exclude("/dl1/monitoring/event/pointing/.*", "event_type") - table_names_tel_id = [f"tel_{tel_id:03d}" for tel_id in self._subarray.tel] - table_names_tel_type = [ - str(telescope) for telescope in self._subarray.telescope_types - ] + if self.write_parameters is False: + writer.exclude(r"/dl1/event/telescope/images/.*", "image_mask") - if self.split_datasets_by == "tel_id": - table_names = table_names_tel_id - elif self.split_datasets_by == "tel_type": - table_names = table_names_tel_type - else: - table_names = [] + if self._is_simulation: + writer.exclude(r"/simulation/event/telescope/images/.*", "true_parameters") + # no timing information yet for true images + writer.exclude( + f"/simulation/event/telescope/parameters/.*", r"peak_time_.*" + ) + writer.exclude(r"/simulation/event/telescope/parameters/.*", r"timing_.*") + writer.exclude("/simulation/event/subarray/shower", "true_tel") - for table_name in table_names: - if self.write_parameters is False: - writer.exclude( - f"/dl1/event/telescope/images/{table_name}", "image_mask" - ) + # Set up transforms - tel_pointing = f"/dl1/monitoring/telescope/pointing/{table_name}" - writer.exclude(tel_pointing, "n_trigger_pixels") - writer.exclude(tel_pointing, "trigger_pixels") - writer.exclude(f"/dl1/event/telescope/images/{table_name}", "parameters") - - if self.transform_image: - tr = FixedPointColumnTransform( - scale=self.image_scale, - offset=self.image_offset, - source_dtype=np.float32, - target_dtype=np.dtype(self.image_dtype), - ) - writer.add_column_transform( - f"dl1/event/telescope/images/{table_name}", "image", tr - ) + if self.transform_image: + tr = FixedPointColumnTransform( + scale=self.image_scale, + offset=self.image_offset, + source_dtype=np.float32, + target_dtype=np.dtype(self.image_dtype), + ) + writer.add_column_transform_regexp( + f"dl1/event/telescope/images/.*", "image", tr + ) - if self.transform_peak_time: - tr = FixedPointColumnTransform( - scale=self.peak_time_scale, - offset=self.peak_time_offset, - source_dtype=np.float32, - target_dtype=np.dtype(self.peak_time_dtype), - ) - writer.add_column_transform( - f"dl1/event/telescope/images/{table_name}", "peak_time", tr - ) + if self.transform_peak_time: + tr = FixedPointColumnTransform( + scale=self.peak_time_scale, + offset=self.peak_time_offset, + source_dtype=np.float32, + target_dtype=np.dtype(self.peak_time_dtype), + ) + writer.add_column_transform_regexp( + f"dl1/event/telescope/images/.*", "peak_time", tr + ) - if self._is_simulation: - writer.exclude( - f"/simulation/event/telescope/images/{table_name}", - "true_parameters", - ) - # no timing information yet for true images - writer.exclude( - f"/simulation/event/telescope/parameters/{table_name}", - r"peak_time_.*", - ) - writer.exclude( - f"/simulation/event/telescope/parameters/{table_name}", r"timing_.*" - ) - writer.exclude("/simulation/event/subarray/shower", "true_tel") - - for table_name in table_names_tel_id: - writer.exclude(f"/dl1/monitoring/event/pointing/{table_name}", "event_type") - - # set the transforms for the tel_lists in the DL2 reconstructed - # parameters. This requires looping over not only telescopes, but also - # potential Algorithm names. - # - # TODO: To reduce circular dependencies, the algorithm names are - # currently hard-coded, but a future refactoring could be to dynamically - # skip these columns on the first written event, or to modify - # TableWriter.add_column_transform to accept a table *pattern* (regexp) - # instead of a name, otherwise this will break if we add another - # Reconstructor - - table_names_reco_algorithms = [ - "HillasReconstructor", - "HillasIntersection", - "ImPACTReconstructor", - ] - - for dataset_name in ReconstructedContainer().keys(): - for table_name in table_names_reco_algorithms: - writer.add_column_transform( - table_name=f"dl2/event/telescope/{dataset_name}/{table_name}", - col_name="tel_ids", - transform=tr_tel_list_to_mask, - ) - writer.add_column_transform( - table_name=f"dl2/event/subarray/{dataset_name}/{table_name}", - col_name="tel_ids", - transform=tr_tel_list_to_mask, - ) + # set up DL2 transforms: + # - the single-tel output has no list of tel_ids + # - the stereo output tel_ids list needs to be transformed to a pattern + writer.exclude("dl2/event/telescope/.*", "tel_ids") + writer.add_column_transform_regexp( + table_regexp=f"dl2/event/subarray/.*", + col_regexp="tel_ids", + transform=tr_tel_list_to_mask, + ) # final initialization @@ -630,6 +591,9 @@ def _write_dl2_telescope_events( for tel_id, dl2_tel in event.dl2.tel.items(): + telescope = self._subarray.tel[tel_id] + table_name = self.table_name(tel_id, str(telescope)) + tel_index = TelEventIndexContainer( obs_id=event.index.obs_id, event_id=event.index.event_id, @@ -639,7 +603,7 @@ def _write_dl2_telescope_events( for container_name, algorithm_map in dl2_tel.items(): for algorithm, container in algorithm_map.items(): writer.write( - table_name=f"dl2/event/telescope/{container_name}/{algorithm}", + table_name=f"dl2/event/telescope/{container_name}/{algorithm}/{table_name}", containers=[tel_index, container], ) diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index 92c774d23c7..cf79de00366 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -27,9 +27,11 @@ def generate_dummy_dl2(event): event.dl2.stereo.geometry[algo].alt = 72 * u.deg event.dl2.stereo.geometry[algo].az = 121 * u.deg + event.dl2.stereo.geometry[algo].tel_ids = [1, 2, 4] event.dl2.stereo.energy[algo].tel_ids = [1, 2, 4] event.dl2.stereo.energy[algo].energy = 10 * u.TeV event.dl2.stereo.classification[algo].prediction = 0.9 + event.dl2.stereo.classification[algo].tel_ids = [1, 2, 4] def test_dl1(tmpdir: Path): @@ -154,10 +156,10 @@ def test_roundtrip(tmpdir: Path): assert np.count_nonzero(dl2_energy.col("tel_ids")[0]) == 3 dl2_tel_energy = h5file.get_node( - "/dl2/event/telescope/energy/HillasReconstructor" + "/dl2/event/telescope/energy/HillasReconstructor/tel_001" ) assert np.allclose(dl2_tel_energy.col("energy"), 10) - assert len(dl2_tel_energy.col("energy")) > len(dl2_energy.col("energy")) + assert "tel_ids" not in dl2_tel_energy # make sure it is readable by the event source and matches the images From 497415bce81752efe410d469b068fd5937af3adb Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 22:23:40 +0200 Subject: [PATCH 22/33] fixed doc reference to old DL1Writer --- docs/data_models/dl1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index 4f8f90b1d60..0d9223979e6 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -14,7 +14,7 @@ Containers marked with a ``+`` should be written without their prefix (all other The following describes the contents of data level 1 (DL1) output files generated by ctapipe (e.g. the ``ctapipe-stage1-process`` tool which uses the -:py:class:`~ctapipe.io.DL1Writer` component to generate output data). +:py:class:`~ctapipe.io.DataWriter` component to generate output data). -------------- From 8cb7983f1693384c3e309a0736bb3228055882e9 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 22:39:19 +0200 Subject: [PATCH 23/33] fixed broken link --- docs/ctapipe_api/io/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ctapipe_api/io/index.rst b/docs/ctapipe_api/io/index.rst index ce12eeb3147..19eea9f62f4 100644 --- a/docs/ctapipe_api/io/index.rst +++ b/docs/ctapipe_api/io/index.rst @@ -120,7 +120,7 @@ Writing Output Files: The `DataWriter` Component allows one to write a series of events (stored in `ctapipe.containers.ArrayEventContainer`) to a standardized HDF5 format file -following the data model (see data_model_). This includes all related datasets +following the data model (see datamodels_). This includes all related datasets such as the instrument and simulation configuration information, simulated shower and image information, observed images and parameters and reconstruction information. It can be used in an event loop like: From 22f498aa75af4c46eb398bd9534c89cf181a31ba Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 22:50:41 +0200 Subject: [PATCH 24/33] more doc fixes --- docs/ctapipe_api/io/index.rst | 2 +- docs/data_models/dl1.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ctapipe_api/io/index.rst b/docs/ctapipe_api/io/index.rst index 19eea9f62f4..715f3c2c05d 100644 --- a/docs/ctapipe_api/io/index.rst +++ b/docs/ctapipe_api/io/index.rst @@ -120,7 +120,7 @@ Writing Output Files: The `DataWriter` Component allows one to write a series of events (stored in `ctapipe.containers.ArrayEventContainer`) to a standardized HDF5 format file -following the data model (see datamodels_). This includes all related datasets +following the data model (see :ref:`datamodels`). This includes all related datasets such as the instrument and simulation configuration information, simulated shower and image information, observed images and parameters and reconstruction information. It can be used in an event loop like: diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index 0d9223979e6..4a18c7a4ef5 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -65,13 +65,13 @@ output file, where `` is the identifier of the algorithm (e.g. - Contents * - /geometry - shower geometry reconstruction - - `EventIndexContainer`, `ReconstructedGeometryContainer` + - :py:class:`~ctapipe.containers.EventIndexContainer`, :py:class:`~ctapipe.containers.ReconstructedGeometryContainer` * - /energy - shower energy reconstruction - - `EventIndexContainer`, `ReconstructedEnergyContainer` + - :py:class:`~ctapipe.containers.EventIndexContainer`, :py:class:`~ctapipe.containers.ReconstructedEnergyContainer` * - /classification - shower classification parameters - - `EventIndexContainer`, `ParticleClassificationContainer` + - :py:class:`~ctapipe.containers.EventIndexContainer`, :py:class:`~ctapipe.containers.ParticleClassificationContainer` --------------------- Simulation Data Model From 3384956aaf655d2e226b3c1cd2509c7874897a03 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Wed, 12 May 2021 23:00:09 +0200 Subject: [PATCH 25/33] and a few more bad doc links fix some style warnings and optimize imports fix bug introduced in last commit (overwrote existing variable) fixed a bunch of pyflakes warnings - made HDF5TableWriter._h5file public - optimized some imports - changed some log statements to use lazy logging instead of f-strings --- ctapipe/containers.py | 6 +- ctapipe/io/datawriter.py | 87 ++++++++++++++--------------- ctapipe/io/hdf5tableio.py | 12 ++-- ctapipe/io/metadata.py | 17 +++--- ctapipe/io/tests/test_datawriter.py | 17 +++--- ctapipe/io/tests/test_hdf5.py | 8 +-- ctapipe/reco/reco_algorithms.py | 2 +- ctapipe/tools/dl1_merge.py | 2 +- docs/data_models/dl1.rst | 6 +- 9 files changed, 77 insertions(+), 80 deletions(-) diff --git a/ctapipe/containers.py b/ctapipe/containers.py index 6e5cb22c23d..8ded1c939eb 100644 --- a/ctapipe/containers.py +++ b/ctapipe/containers.py @@ -604,9 +604,9 @@ class ReconstructedContainer(Container): class DL2Container(Container): - """ - Reconstructed Shower information for a given reconstruction algorithm, - including optionally both per-telescope mono reconstruction and per-shower stereo reconstructions + """Reconstructed Shower information for a given reconstruction algorithm, + including optionally both per-telescope mono reconstruction and per-shower + stereo reconstructions """ tel = Field( diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 5380eafd46f..088de21be69 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -16,16 +16,15 @@ ArrayEventContainer, SimulatedShowerDistribution, TelEventIndexContainer, - ReconstructedContainer, ) from ..core import Component, Container, Field, Provenance, ToolConfigurationError -from ..core.traits import Bool, CaselessStrEnum, Int, Path, Float, Unicode +from ..core.traits import Bool, CaselessStrEnum, Float, Int, Path, Unicode +from ..instrument import SubarrayDescription from ..io import EventSource, HDF5TableWriter, TableWriter -from ..io.simteleventsource import SimTelEventSource from ..io import metadata as meta -from ..io.tableio import FixedPointColumnTransform, TelListToMaskTransform -from ..instrument import SubarrayDescription from ..io.datalevels import DataLevel +from ..io.simteleventsource import SimTelEventSource +from ..io.tableio import FixedPointColumnTransform, TelListToMaskTransform __all__ = ["DataWriter", "DATA_MODEL_VERSION", "write_reference_metadata_headers"] @@ -80,7 +79,7 @@ def write_reference_metadata_headers( process=meta.Process( type_="Simulation" if is_simulation else "Observation", subtype="", - id_=int(min(obs_ids)), # FIXME: hack, proper id needs to be defined + id_=",".join(obs_ids), ), activity=meta.Activity.from_provenance(activity), instrument=meta.Instrument( @@ -92,9 +91,8 @@ def write_reference_metadata_headers( ), ) - # TODO: add activity_stop_time? headers = reference.to_dict() - meta.write_to_hdf5(headers, writer._h5file) + meta.write_to_hdf5(headers, writer.h5file) class DataWriter(Component): @@ -114,6 +112,8 @@ class DataWriter(Component): write_data(event) """ + # pylint: disable=too-many-instance-attributes + output_path = Path( help="output filename", default_value=pathlib.Path("events.dl1.h5") ).tag(config=True) @@ -202,7 +202,7 @@ def __init__(self, event_source: EventSource, config=None, parent=None, **kwargs self._hdf5_filters = None self._last_pointing_tel: DefaultDict[Tuple] = None self._last_pointing: Tuple = None - self._writer: TableWriter = None + self._writer: HDF5TableWriter = None self._setup_output_path() self._subarray.to_hdf(self.output_path) # must be first (uses astropy io) @@ -229,7 +229,7 @@ def __call__(self, event: ArrayEventContainer): # Write subarray event data self._write_subarray_pointing(event, writer=self._writer) - self.log.debug(f"WRITING EVENT {event.index}") + self.log.debug("WRITING EVENT %s", event.index) self._writer.write( table_name="dl1/event/subarray/trigger", containers=[event.index, event.trigger], @@ -298,7 +298,7 @@ def _setup_output_path(self): self.output_path = self.output_path.expanduser() if self.output_path.exists(): if self.overwrite: - self.log.warning(f"Overwriting {self.output_path}") + self.log.warning("Overwriting %s", self.output_path) self.output_path.unlink() else: raise ToolConfigurationError( @@ -337,10 +337,11 @@ def _setup_writer(self): transform=tr_tel_list_to_mask, ) - # avoid some warnings about unwritable columns (which here are just sub-containers) + # avoid some warnings about unwritable columns (which here are just + # sub-containers) writer.exclude("dl1/event/subarray/trigger", "tel") writer.exclude("dl1/monitoring/subarray/pointing", "tel") - writer.exclude(f"/dl1/event/telescope/images/.*", "parameters") + writer.exclude("/dl1/event/telescope/images/.*", "parameters") # currently the trigger info is used for the event time, but we dont' # want the other bits of the trigger container in the pointing or other @@ -354,39 +355,37 @@ def _setup_writer(self): writer.exclude("/dl1/monitoring/event/pointing/.*", "event_type") if self.write_parameters is False: - writer.exclude(r"/dl1/event/telescope/images/.*", "image_mask") + writer.exclude("/dl1/event/telescope/images/.*", "image_mask") if self._is_simulation: - writer.exclude(r"/simulation/event/telescope/images/.*", "true_parameters") + writer.exclude("/simulation/event/telescope/images/.*", "true_parameters") # no timing information yet for true images - writer.exclude( - f"/simulation/event/telescope/parameters/.*", r"peak_time_.*" - ) - writer.exclude(r"/simulation/event/telescope/parameters/.*", r"timing_.*") + writer.exclude("/simulation/event/telescope/parameters/.*", r"peak_time_.*") + writer.exclude("/simulation/event/telescope/parameters/.*", "timing_.*") writer.exclude("/simulation/event/subarray/shower", "true_tel") # Set up transforms if self.transform_image: - tr = FixedPointColumnTransform( + transform = FixedPointColumnTransform( scale=self.image_scale, offset=self.image_offset, source_dtype=np.float32, target_dtype=np.dtype(self.image_dtype), ) writer.add_column_transform_regexp( - f"dl1/event/telescope/images/.*", "image", tr + "dl1/event/telescope/images/.*", "image", transform ) if self.transform_peak_time: - tr = FixedPointColumnTransform( + transform = FixedPointColumnTransform( scale=self.peak_time_scale, offset=self.peak_time_offset, source_dtype=np.float32, target_dtype=np.dtype(self.peak_time_dtype), ) writer.add_column_transform_regexp( - f"dl1/event/telescope/images/.*", "peak_time", tr + "dl1/event/telescope/images/.*", "peak_time", transform ) # set up DL2 transforms: @@ -394,7 +393,7 @@ def _setup_writer(self): # - the stereo output tel_ids list needs to be transformed to a pattern writer.exclude("dl2/event/telescope/.*", "tel_ids") writer.add_column_transform_regexp( - table_regexp=f"dl2/event/subarray/.*", + table_regexp="dl2/event/subarray/.*", col_regexp="tel_ids", transform=tr_tel_list_to_mask, ) @@ -423,6 +422,8 @@ def _write_simulation_configuration(self): self.log.debug("Writing simulation configuration") class ExtraSimInfo(Container): + """just to contain obs_id""" + container_prefix = "" obs_id = Field(0, "Simulation Run Identifier") @@ -442,19 +443,15 @@ class ExtraSimInfo(Container): def write_simulation_histograms(self, event_source): """Write the distribution of thrown showers - TODO: this needs to be fixed, since it currently requires access to the - low-level _file attribute of the SimTelEventSource. Instead, SimTelEventSource should - provide this as header info, like ``source.simulation_config`` - Notes ----- - - this only runs if this is a simulation file. The current implementation is - a bit of a hack and implies we should improve SimTelEventSource to read this - info. - - Currently the histograms are at the end of the simtel file, so if max_events - is set to non-zero, the end of the file may not be read, and this no - histograms will be found. + - this only runs if this is a simulation file. The current + implementation is a bit of a hack and implies we should improve + SimTelEventSource to read this info. + - Currently the histograms are at the end of the simtel file, so if + max_events is set to non-zero, the end of the file may not be read, + and this no histograms will be found. """ if not self._is_simulation: self.log.debug("Not writing simulation histograms for observed data") @@ -504,6 +501,7 @@ def fill_from_simtel( ) def table_name(self, tel_id, tel_type): + """construct dataset table names depending on chosen split method""" return f"tel_{tel_id:03d}" if self.split_datasets_by == "tel_id" else tel_type def _write_dl1_telescope_events( @@ -602,17 +600,18 @@ def _write_dl2_telescope_events( for container_name, algorithm_map in dl2_tel.items(): for algorithm, container in algorithm_map.items(): - writer.write( - table_name=f"dl2/event/telescope/{container_name}/{algorithm}/{table_name}", - containers=[tel_index, container], + name = ( + f"dl2/event/telescope/{container_name}/{algorithm}/{table_name}" ) + writer.write(table_name=name, containers=[tel_index, container]) + def _write_dl2_stereo_event(self, writer: TableWriter, event: ArrayEventContainer): """ write per-telescope DL2 shower information to e.g. `/dl2/event/stereo/{geometry,energy,classification}/` """ - + # pylint: disable=no-self-use for container_name, algorithm_map in event.dl2.stereo.items(): for algorithm, container in algorithm_map.items(): # note this will only write info if the particular algorithm @@ -627,7 +626,7 @@ def _generate_table_indices(self, h5file, start_node): """ helper to generate PyTables index tabnles for common columns """ for node in h5file.iter_nodes(start_node): if not isinstance(node, tables.group.Group): - self.log.debug(f"gen indices for: {node}") + self.log.debug("generating indices for node: %s", node) if "event_id" in node.colnames: node.cols.event_id.create_index() self.log.debug("generated event_id index") @@ -646,19 +645,19 @@ def _generate_indices(self): self.log.debug("Writing index tables") if self.write_images: self._generate_table_indices( - self._writer._h5file, "/dl1/event/telescope/images" + self._writer.h5file, "/dl1/event/telescope/images" ) if self._is_simulation: self._generate_table_indices( - self._writer._h5file, "/simulation/event/telescope/images" + self._writer.h5file, "/simulation/event/telescope/images" ) if self.write_parameters: self._generate_table_indices( - self._writer._h5file, "/dl1/event/telescope/parameters" + self._writer.h5file, "/dl1/event/telescope/parameters" ) if self._is_simulation: self._generate_table_indices( - self._writer._h5file, "/simulation/event/telescope/parameters" + self._writer.h5file, "/simulation/event/telescope/parameters" ) - self._generate_table_indices(self._writer._h5file, "/dl1/event/subarray") + self._generate_table_indices(self._writer.h5file, "/dl1/event/subarray") diff --git a/ctapipe/io/hdf5tableio.py b/ctapipe/io/hdf5tableio.py index 7873641352f..e5213aea876 100644 --- a/ctapipe/io/hdf5tableio.py +++ b/ctapipe/io/hdf5tableio.py @@ -127,14 +127,14 @@ def __init__( self._group = "/" + group_name self.filters = filters - self.log.debug("h5file: %s", self._h5file) + self.log.debug("h5file: %s", self.h5file) def open(self, filename, **kwargs): self.log.debug("kwargs for tables.open_file: %s", kwargs) - self._h5file = tables.open_file(filename, **kwargs) + self.h5file = tables.open_file(filename, **kwargs) def close(self): - self._h5file.close() + self.h5file.close() def _create_hdf5_table_schema(self, table_name, containers): """ @@ -271,8 +271,8 @@ def _setup_new_table(self, table_name, containers): for container in containers: meta.update(container.meta) # copy metadata from container - if table_path not in self._h5file: - table = self._h5file.create_table( + if table_path not in self.h5file: + table = self.h5file.create_table( where=table_group, name=table_basename, title="Storage of {}".format( @@ -286,7 +286,7 @@ def _setup_new_table(self, table_name, containers): for key, val in meta.items(): table.attrs[key] = val else: - table = self._h5file.get_node(table_path) + table = self.h5file.get_node(table_path) self._tables[table_name] = table diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index 67e6d258146..5abfaf14cba 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -26,10 +26,11 @@ import warnings from collections import OrderedDict -from traitlets import Enum, Unicode, Int, HasTraits, default, Instance, List +from tables import NaturalNameWarning +from traitlets import Enum, HasTraits, Instance, Int, List, Unicode, default +from astropy.time import Time -from ctapipe.core.provenance import _ActivityProvenance -from ctapipe.core.traits import AstroTime +from ..core.traits import AstroTime __all__ = [ "Reference", @@ -41,10 +42,8 @@ "write_to_hdf5", ] -from astropy.time import Time - -CONVERSIONS = {Time: lambda t: t.utc.iso, list: lambda l: str(l)} +CONVERSIONS = {Time: lambda t: t.utc.iso, list: str} class Contact(HasTraits): @@ -183,9 +182,9 @@ def _to_dict(hastraits_instance, prefix=""): """ res = {} - for k, tr in hastraits_instance.traits().items(): + for k, trait in hastraits_instance.traits().items(): key = (prefix + k.upper().replace("_", " ")).replace(" ", " ").strip() - val = tr.get(hastraits_instance) + val = trait.get(hastraits_instance) # apply type conversions val = CONVERSIONS.get(type(val), lambda v: v)(val) @@ -232,8 +231,6 @@ def write_to_hdf5(metadata, h5file): h5file: tables.file.File pytables filehandle """ - from tables import NaturalNameWarning - with warnings.catch_warnings(): warnings.simplefilter("ignore", NaturalNameWarning) for key, value in metadata.items(): diff --git a/ctapipe/io/tests/test_datawriter.py b/ctapipe/io/tests/test_datawriter.py index cf79de00366..a9994d10c3e 100644 --- a/ctapipe/io/tests/test_datawriter.py +++ b/ctapipe/io/tests/test_datawriter.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 -import numpy as np -from ctapipe.io.datawriter import DataWriter, DATA_MODEL_VERSION -from ctapipe.utils import get_dataset_path -from ctapipe.io import EventSource, DataLevel -from ctapipe.calib import CameraCalibrator -from pathlib import Path -from ctapipe.instrument import SubarrayDescription +import logging from copy import deepcopy +from pathlib import Path + +import numpy as np import tables -import logging from astropy import units as u +from ctapipe.calib import CameraCalibrator +from ctapipe.instrument import SubarrayDescription +from ctapipe.io import DataLevel, EventSource +from ctapipe.io.datawriter import DATA_MODEL_VERSION, DataWriter +from ctapipe.utils import get_dataset_path def generate_dummy_dl2(event): diff --git a/ctapipe/io/tests/test_hdf5.py b/ctapipe/io/tests/test_hdf5.py index 054091fd171..48f2c1f234a 100644 --- a/ctapipe/io/tests/test_hdf5.py +++ b/ctapipe/io/tests/test_hdf5.py @@ -383,9 +383,9 @@ def test_writer_closes_file(temp_h5_file): with tempfile.NamedTemporaryFile() as f: with HDF5TableWriter(f.name, "test") as h5_table: - assert h5_table._h5file.isopen == 1 + assert h5_table.h5file.isopen == 1 - assert h5_table._h5file.isopen == 0 + assert h5_table.h5file.isopen == 0 def test_reader_closes_file(temp_h5_file): @@ -425,7 +425,7 @@ def test_closing_writer(temp_h5_file): h5_table = HDF5TableWriter(f.name, "test") h5_table.close() - assert h5_table._h5file.isopen == 0 + assert h5_table.h5file.isopen == 0 def test_cannot_read_with_writer(temp_h5_file): @@ -728,7 +728,7 @@ class TestContainer(Container): with HDF5TableWriter( f.name, group_name="data", mode="w", filters=no_comp ) as writer: - assert writer._h5file.filters.complevel == 0 + assert writer.h5file.filters.complevel == 0 c = TestContainer(value=5) writer.write("default", c) diff --git a/ctapipe/reco/reco_algorithms.py b/ctapipe/reco/reco_algorithms.py index 57462fbb2c4..ca8e3a85d7b 100644 --- a/ctapipe/reco/reco_algorithms.py +++ b/ctapipe/reco/reco_algorithms.py @@ -35,7 +35,7 @@ def predict(self, tels_dict): Returns ------- - `~ctapipe.containers.ReconstructedShowerContainer` + `~ctapipe.containers.ReconstructedGeometryContainer` """ return ReconstructedGeometryContainer() diff --git a/ctapipe/tools/dl1_merge.py b/ctapipe/tools/dl1_merge.py index 081a662b099..6b5934f6833 100644 --- a/ctapipe/tools/dl1_merge.py +++ b/ctapipe/tools/dl1_merge.py @@ -430,7 +430,7 @@ def finish(self): with HDF5TableWriter( self.output_path, parent=self, mode="a", add_prefix=True ) as writer: - meta.write_to_hdf5(headers, writer._h5file) + meta.write_to_hdf5(headers, writer.h5file) def main(): diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst index 4a18c7a4ef5..987af93b9d3 100644 --- a/docs/data_models/dl1.rst +++ b/docs/data_models/dl1.rst @@ -51,9 +51,9 @@ DL2 Data Model -------------- This describes data that change per-event. The following datasets will be -written to the group `/dl2/event/stereo//` and or -`/dl2/event/mono//`, one for each reconstruction algorithm in the -output file, where `` is the identifier of the algorithm (e.g. +written to the group ``/dl2/event/stereo//`` and or +``/dl2/event/mono//``, one for each reconstruction algorithm in the +output file, where ```` is the identifier of the algorithm (e.g. "Hillas"): .. list-table:: From d45855dfd2ab2b9c861156886f458ad2445d9c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Thu, 13 May 2021 12:38:18 +0200 Subject: [PATCH 26/33] Precompile table regex for column exclusion, fixes #1719 (#1720) --- ctapipe/io/tableio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctapipe/io/tableio.py b/ctapipe/io/tableio.py index 86ac456852d..f0b309fbeb9 100644 --- a/ctapipe/io/tableio.py +++ b/ctapipe/io/tableio.py @@ -56,11 +56,11 @@ def exclude(self, table_regexp, pattern): """ table_regexp = table_regexp.lstrip("/") - self._exclusions[table_regexp].append(re.compile(pattern)) + self._exclusions[re.compile(table_regexp)].append(re.compile(pattern)) def _is_column_excluded(self, table_name, col_name): for table_regexp, col_regexp_list in self._exclusions.items(): - if re.fullmatch(table_regexp, table_name): + if table_regexp.fullmatch(table_name): for col_regexp in col_regexp_list: if col_regexp.fullmatch(col_name): return True From 86043d0ee19538d112151b0777d283368554b5e4 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Thu, 13 May 2021 22:28:07 +0200 Subject: [PATCH 27/33] changed metadata process id to str for merged runs --- ctapipe/io/datawriter.py | 2 +- ctapipe/io/metadata.py | 2 +- ctapipe/io/tests/test_metadata.py | 2 +- ctapipe/tools/dl1_merge.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index 088de21be69..d3e8d42a362 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -79,7 +79,7 @@ def write_reference_metadata_headers( process=meta.Process( type_="Simulation" if is_simulation else "Observation", subtype="", - id_=",".join(obs_ids), + id_=",".join(str(x) for x in obs_ids), ), activity=meta.Activity.from_provenance(activity), instrument=meta.Instrument( diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index 5abfaf14cba..68885bd957d 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -103,7 +103,7 @@ class Process(HasTraits): type_ = Enum(["Observation", "Simulation", "Other"], "Other") subtype = Unicode("") - id_ = Int() + id_ = Unicode("") class Activity(HasTraits): diff --git a/ctapipe/io/tests/test_metadata.py b/ctapipe/io/tests/test_metadata.py index 6c9eb98ad9b..8ee2919918f 100644 --- a/ctapipe/io/tests/test_metadata.py +++ b/ctapipe/io/tests/test_metadata.py @@ -29,7 +29,7 @@ def test_construct_and_write_metadata(tmp_path): data_model_url="http://google.com", format="hdf5", ), - process=meta.Process(type_="Simulation", subtype="Prod3b", id_=423442), + process=meta.Process(type_="Simulation", subtype="Prod3b", id_="423442"), activity=meta.Activity.from_provenance(prov_activity.provenance), instrument=meta.Instrument( site="CTA-North", diff --git a/ctapipe/tools/dl1_merge.py b/ctapipe/tools/dl1_merge.py index 6b5934f6833..9f8c8a36dbf 100644 --- a/ctapipe/tools/dl1_merge.py +++ b/ctapipe/tools/dl1_merge.py @@ -414,7 +414,7 @@ def finish(self): data_model_url="", format="hdf5", ), - process=meta.Process(type_=process_type_, subtype="", id_=0), + process=meta.Process(type_=process_type_, subtype="", id_="merge"), activity=meta.Activity.from_provenance(activity), instrument=meta.Instrument( site="Other", From 4b09f798f64058d26490355772c6edbfb8f76544 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Fri, 14 May 2021 00:32:00 +0200 Subject: [PATCH 28/33] fix notebook --- docs/examples/table_writer_reader.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/examples/table_writer_reader.ipynb b/docs/examples/table_writer_reader.ipynb index 3a42c998ffe..aa279843bbb 100644 --- a/docs/examples/table_writer_reader.ipynb +++ b/docs/examples/table_writer_reader.ipynb @@ -261,7 +261,7 @@ " \n", " h5_table.write('table', data)\n", "\n", - " print(h5_table._h5file)" + " print(h5_table.h5file)" ] }, { @@ -294,7 +294,7 @@ " \n", " h5_table.write('table', data)\n", "\n", - " print(h5_table._h5file)" + " print(h5_table.h5file)" ] }, { @@ -331,7 +331,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(bool(h5_table._h5file.isopen))" + "print(bool(h5_table.h5file.isopen))" ] }, { @@ -512,7 +512,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.8.8" } }, "nbformat": 4, From b5761265585278e6c4e20069ae505554d1ada788 Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Fri, 14 May 2021 10:42:05 +0200 Subject: [PATCH 29/33] optimize imports --- ctapipe/io/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index 68885bd957d..c9e390ef3de 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -26,9 +26,9 @@ import warnings from collections import OrderedDict -from tables import NaturalNameWarning -from traitlets import Enum, HasTraits, Instance, Int, List, Unicode, default from astropy.time import Time +from tables import NaturalNameWarning +from traitlets import Enum, HasTraits, Instance, List, Unicode, default from ..core.traits import AstroTime From ab2fcbded4ac74cbccb5bfce4898af40f6eda45a Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Fri, 14 May 2021 14:22:27 +0200 Subject: [PATCH 30/33] use ENUM for DataLevel in metadata --- ctapipe/io/metadata.py | 21 ++------------------- ctapipe/io/tests/test_metadata.py | 2 +- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index c9e390ef3de..eea98801791 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -31,6 +31,7 @@ from traitlets import Enum, HasTraits, Instance, List, Unicode, default from ..core.traits import AstroTime +from .datalevels import DataLevel __all__ = [ "Reference", @@ -61,25 +62,7 @@ class Product(HasTraits): creation_time = AstroTime() id_ = Unicode(help="leave unspecified to automatically generate a UUID") data_category = Enum(["Sim", "A", "B", "C", "Other"], "Other") - data_level = List( - Enum( - [ - "R0", - "R1", - "DL0", - "DL1", - "DL1_IMAGES", - "DL1_PARAMETERS", - "DL2", - "DL3", - "DL4", - "DL5", - "DL6", - "Other", - ], - "Other", - ) - ) + data_level = List(Enum([level.name for level in DataLevel])) data_association = Enum(["Subarray", "Telescope", "Target", "Other"], "Other") data_model_name = Unicode("unknown") data_model_version = Unicode("unknown") diff --git a/ctapipe/io/tests/test_metadata.py b/ctapipe/io/tests/test_metadata.py index 8ee2919918f..1cdd4447bf9 100644 --- a/ctapipe/io/tests/test_metadata.py +++ b/ctapipe/io/tests/test_metadata.py @@ -22,7 +22,7 @@ def test_construct_and_write_metadata(tmp_path): description="An Amazing Product", creation_time="2020-10-11 15:23:31", data_category="Sim", - data_level=["DL1"], + data_level=["DL1_IMAGES", "DL1_PARAMETERS"], data_association="Subarray", data_model_name="Unofficial DL1", data_model_version="1.0", From 6a8798ed5467fb7ad7d00b2b7e70bfc3ad8b598c Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Fri, 14 May 2021 14:22:39 +0200 Subject: [PATCH 31/33] fix incorrect help strings --- ctapipe/io/datawriter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctapipe/io/datawriter.py b/ctapipe/io/datawriter.py index d3e8d42a362..f2790baa5c0 100644 --- a/ctapipe/io/datawriter.py +++ b/ctapipe/io/datawriter.py @@ -127,11 +127,11 @@ class DataWriter(Component): ).tag(config=True) write_stereo_shower = Bool( - help="Store DL2 stereo shower geometry if available", default_value=False + help="Store DL2 stereo shower parameters if available", default_value=False ).tag(config=True) write_mono_shower = Bool( - help="Store DL2 stereo mono geometry if available", default_value=False + help="Store DL2 mono parameters if available", default_value=False ).tag(config=True) compression_level = Int( From 03a68b45c99c95455477c907f13075044b6fd5cf Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Fri, 14 May 2021 14:34:45 +0200 Subject: [PATCH 32/33] remove incorrect R2 data level, add DL4-DL6 plus comments --- ctapipe/io/datalevels.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ctapipe/io/datalevels.py b/ctapipe/io/datalevels.py index 5fee518f5c8..10d902579eb 100644 --- a/ctapipe/io/datalevels.py +++ b/ctapipe/io/datalevels.py @@ -4,11 +4,16 @@ class DataLevel(Enum): """Enum of the different Data Levels""" - R0 = auto() - R1 = auto() - R2 = auto() - DL0 = auto() - DL1_IMAGES = auto() - DL1_PARAMETERS = auto() - DL2 = auto() - DL3 = auto() + R0 = auto() # Raw data in camera or simulation format + R1 = auto() # Raw data in common format, with preliminary calibration + DL0 = auto() # raw archived data in common format, with optional zero suppression + DL1_IMAGES = auto() # processed data up to camera images + DL1_PARAMETERS = auto() # parameters derived from camera images + DL2 = auto() # reconstructed data + DL3 = auto() # reduced reconstructed data + + # the rest are not generated by ctapipe, but are listed here in case this + # code is used elsewhere: + DL4 = auto() # binned datasets + DL5 = auto() # science datasets (fluxes) + DL6 = auto() # derived science data (catalogs, etc.) From 08f56edb97a3d33545a04357424dea69fa495cab Mon Sep 17 00:00:00 2001 From: Karl Kosack Date: Sun, 16 May 2021 14:33:26 +0200 Subject: [PATCH 33/33] added added generic DL1 data level --- ctapipe/io/datalevels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ctapipe/io/datalevels.py b/ctapipe/io/datalevels.py index 10d902579eb..aa31caa84f4 100644 --- a/ctapipe/io/datalevels.py +++ b/ctapipe/io/datalevels.py @@ -7,6 +7,7 @@ class DataLevel(Enum): R0 = auto() # Raw data in camera or simulation format R1 = auto() # Raw data in common format, with preliminary calibration DL0 = auto() # raw archived data in common format, with optional zero suppression + DL1 = auto() # processed data DL1_IMAGES = auto() # processed data up to camera images DL1_PARAMETERS = auto() # parameters derived from camera images DL2 = auto() # reconstructed data