Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows DataStoreMaker to be used with IRFs not following CALDB structure #3846

Merged
merged 19 commits into from Apr 7, 2022
Merged
40 changes: 27 additions & 13 deletions docs/tutorials/analysis/3D/event_sampling.ipynb
Expand Up @@ -144,7 +144,7 @@
"metadata": {},
"outputs": [],
"source": [
"filename = \"$GAMMAPY_DATA/cta-caldb/Prod5-South-20deg-AverageAz-14MSTs37SSTs.180000s-v0.1.fits.gz\"\n",
"irf_filename = \"$GAMMAPY_DATA/cta-caldb/Prod5-South-20deg-AverageAz-14MSTs37SSTs.180000s-v0.1.fits.gz\"\n",
"\n",
"pointing = SkyCoord(0.0, 0.0, frame=\"galactic\", unit=\"deg\")\n",
"livetime = 1 * u.hr"
Expand All @@ -165,7 +165,7 @@
"metadata": {},
"outputs": [],
"source": [
"irfs = load_cta_irfs(filename)\n",
"irfs = load_cta_irfs(irf_filename)\n",
"location = observatory_locations['cta_south']\n",
"\n",
"observation = Observation.create(\n",
Expand Down Expand Up @@ -375,12 +375,11 @@
"metadata": {},
"outputs": [],
"source": [
"events.table.meta[\"OBJECT\"] = dataset.models[0].name"
"events.table.meta[\"OBJECT\"] = dataset.models[0].name\n"
]
},
{
"cell_type": "markdown",
"id": "874b911e",
"metadata": {},
"source": [
"Let's write the event list and its GTI extension to a FITS file. We make use of `fits` library in `astropy`:"
Expand Down Expand Up @@ -747,8 +746,11 @@
"outputs": [],
"source": [
"%%time\n",
"n_obs = len(tstarts)\n",
"irf_paths = [Path(irf_filename)] * n_obs\n",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it useful here? It is only when reading that you need the path to the irf now, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general observations can have different irf files so the list of irfs_path have to be defined before the loop. I wanted to stress that in the notebook, even if it is not necessary for this use case (but we could use 2 different zenith angles for example).

"events_paths = []\n",
"for idx, tstart in enumerate(tstarts):\n",
"\n",
" irfs = load_cta_irfs(irf_paths[idx])\n",
" observation = Observation.create(\n",
" obs_id=idx,\n",
" pointing=pointing,\n",
Expand All @@ -760,20 +762,20 @@
"\n",
" dataset = maker.run(empty, observation)\n",
" dataset.models = models\n",
"\n",
" sampler = MapDatasetEventSampler(random_state=idx)\n",
" events = sampler.run(dataset, observation)\n",
" events.table.write(\n",
" f\"./event_sampling/events_{idx:04d}.fits\", overwrite=True\n",
" )"
" \n",
" path = Path(f\"./event_sampling/events_{idx:04d}.fits\")\n",
" events_paths.append(path)\n",
" events.table.write(path, overwrite=True)"
]
},
{
"cell_type": "markdown",
"id": "626a8673",
"metadata": {},
"source": [
"You can now load the event list with `Datastore.from_events_files()` and make your own analysis following the instructions in the [`analysis_2`](analysis_2.ipynb) tutorial."
"You can now load the event list and the corresponding IRFs with `Datastore.from_events_files()` : "
]
},
{
Expand All @@ -784,8 +786,8 @@
"outputs": [],
"source": [
"path = Path(\"./event_sampling/\")\n",
"paths = list(path.rglob(\"events*.fits\"))\n",
"data_store = DataStore.from_events_files(paths)\n",
"events_paths = list(path.rglob(\"events*.fits\"))\n",
"data_store = DataStore.from_events_files(events_paths, irf_paths)\n",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably where you could define the list of irf_path no?

"data_store.obs_table"
]
},
Expand All @@ -794,7 +796,17 @@
"id": "bbf88446",
"metadata": {},
"source": [
"For completeness, `data_store` is a `~gammapy.data.Datastore` object. You can find more information about it [here](https://docs.gammapy.org/dev/tutorials/data/cta.html#Datastore)."
"Then you can create the obervations from the Datastore and make your own analysis following the instructions in the [`analysis_2`](analysis_2.ipynb) tutorial."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"observations = data_store.get_observations()\n",
"observations[0].peek()"
]
},
{
Expand All @@ -812,6 +824,8 @@
"id": "38a38c6a",
"metadata": {},
"source": [
"For completeness, `data_store` is a `~gammapy.data.Datastore` object. You can find more information about it [here](https://docs.gammapy.org/dev/tutorials/data/cta.html#Datastore).\n",
"\n",
"## Exercises\n",
"- Try to sample events for an extended source (e.g. a radial gaussian morphology);\n",
"- Change the spatial model and the spectrum of the simulated Sky model;\n",
Expand Down
92 changes: 60 additions & 32 deletions gammapy/data/data_store.py
Expand Up @@ -153,7 +153,7 @@ def from_dir(cls, base_dir, hdu_table_filename=None, obs_table_filename=None):
return cls(hdu_table=hdu_table, obs_table=obs_table)

@classmethod
def from_events_files(cls, paths):
def from_events_files(cls, events_paths, irfs_paths=None):
"""Create from a list of event filenames.

HDU and observation index tables will be created from the EVENTS header.
Expand All @@ -170,16 +170,28 @@ def from_events_files(cls, paths):

.. _ctobssim: http://cta.irap.omp.eu/ctools/users/reference_manual/ctobssim.html

Parameters
-------
events_paths : list of str or Path
List of paths to the events files

irfs_paths : str, Path, or list of str or Path
Path to the IRFs file. If a list is provided it must be the same length than `events_paths`.
If None the events files have to contain CALDB and IRF header keywords to locate the IRF files,
otherwise the IRFs are assumed to be contained in the events files.

Returns
-------
data_store : `DataStore`
Data store

Examples
--------
This is how you can access a single event list::

>>> from gammapy.data import DataStore
>>> import os
>>> os.environ["CALDB"] = os.environ["GAMMAPY_DATA"] + "/cta-1dc/caldb"
>>> path = "$GAMMAPY_DATA/cta-1dc/data/baseline/gps/gps_baseline_110380.fits"
>>> data_store = DataStore.from_events_files([path])
>>> observations = data_store.get_observations()
Expand All @@ -204,7 +216,7 @@ def from_events_files(cls, paths):
>>> data_store.hdu_table.write("hdu-index.fits.gz") # doctest: +SKIP
>>> data_store.obs_table.write("obs-index.fits.gz") # doctest: +SKIP
"""
return DataStoreMaker(paths).run()
return DataStoreMaker(events_paths, irfs_paths).run()

def info(self, show=True):
"""Print some info."""
Expand Down Expand Up @@ -433,7 +445,6 @@ def check_consistency(self):
"level": "error",
"msg": "Inconsistent OBS_ID in obs and HDU index tables",
}

# TODO: obs table and events header should have the same times

def check_observations(self):
Expand All @@ -450,11 +461,15 @@ class DataStoreMaker:
Users will usually call this via `DataStore.from_events_files`.
"""

def __init__(self, paths):
if isinstance(paths, (str, Path)):
def __init__(self, events_paths, irfs_paths=None):
if isinstance(events_paths, (str, Path)):
raise TypeError("Need list of paths, not a single string or Path object.")

self.paths = [make_path(path) for path in paths]
self.events_paths = [make_path(path) for path in events_paths]
if irfs_paths is None or isinstance(irfs_paths, (str, Path)):
self.irfs_paths = [make_path(irfs_paths)] * len(events_paths)
else:
self.irfs_paths = [make_path(path) for path in irfs_paths]

# Cache for EVENTS file header information, to avoid multiple reads
self._events_info = {}
Expand All @@ -464,22 +479,22 @@ def run(self):
obs_table = self.make_obs_table()
return DataStore(hdu_table=hdu_table, obs_table=obs_table)

def get_events_info(self, path):
if path not in self._events_info:
self._events_info[path] = self.read_events_info(path)
def get_events_info(self, events_path, irf_path=None):
if events_path not in self._events_info:
self._events_info[events_path] = self.read_events_info(events_path, irf_path)

return self._events_info[path]
return self._events_info[events_path]

def get_obs_info(self, path):
def get_obs_info(self, events_path, irf_path=None):
# We could add or remove info here depending on what we want in the obs table
return self.get_events_info(path)
return self.get_events_info(events_path, irf_path)

@staticmethod
def read_events_info(path):
def read_events_info(events_path, irf_path=None):
"""Read mandatory events header info"""
log.debug(f"Reading {path}")
log.debug(f"Reading {events_path}")

with fits.open(path, memmap=False) as hdu_list:
with fits.open(events_path, memmap=False) as hdu_list:
header = hdu_list["EVENTS"].header

na_int, na_str = -1, "NOT AVAILABLE"
Expand Down Expand Up @@ -515,19 +530,27 @@ def read_events_info(path):
info["N_TELS"] = header.get("N_TELS", na_int)
info["OBJECT"] = header.get("OBJECT", na_str)

# This is the info needed to link from EVENTS to IRFs
info["CALDB"] = header.get("CALDB", na_str)
info["IRF"] = header.get("IRF", na_str)

# Not part of the spec, but good to know from which file the info comes
info["EVENTS_FILENAME"] = str(path)
info["EVENTS_FILENAME"] = str(events_path)
info["EVENT_COUNT"] = header["NAXIS2"]

# This is the info needed to link from EVENTS to IRFs
info["CALDB"] = header.get("CALDB", na_str)
info["IRF"] = header.get("IRF", na_str)
if irf_path is not None:
info["IRF_FILENAME"] = str(irf_path)
elif info["CALDB"] != na_str and info["IRF"] != na_str:
caldb_irf = CalDBIRF.from_meta(info)
info["IRF_FILENAME"] = str(caldb_irf.file_path)
else:
info["IRF_FILENAME"] = info["EVENTS_FILENAME"]
return info

def make_obs_table(self):
rows = []
for path in self.paths:
row = self.get_obs_info(path)
for events_path, irf_path in zip(self.events_paths, self.irfs_paths):
row = self.get_obs_info(events_path, irf_path)
rows.append(row)

names = list(rows[0].keys())
Expand All @@ -554,8 +577,8 @@ def make_obs_table(self):

def make_hdu_table(self):
rows = []
for path in self.paths:
rows.extend(self.get_hdu_table_rows(path))
for events_path, irf_path in zip(self.events_paths, self.irfs_paths):
rows.extend(self.get_hdu_table_rows(events_path, irf_path))

names = list(rows[0].keys())
# names = ['OBS_ID', 'HDU_TYPE', 'HDU_CLASS', 'FILE_DIR', 'FILE_NAME', 'HDU_NAME']
Expand All @@ -571,22 +594,22 @@ def make_hdu_table(self):

return table

def get_hdu_table_rows(self, path):
events_info = self.get_events_info(path)
def get_hdu_table_rows(self, events_path, irf_path=None):
events_info = self.get_obs_info(events_path, irf_path)

info = dict(
OBS_ID=events_info["OBS_ID"],
FILE_DIR=path.parent.as_posix(),
FILE_NAME=path.name,
FILE_DIR=events_path.parent.as_posix(),
FILE_NAME=events_path.name,
)
yield dict(HDU_TYPE="events", HDU_CLASS="events", HDU_NAME="EVENTS", **info)
yield dict(HDU_TYPE="gti", HDU_CLASS="gti", HDU_NAME="GTI", **info)

caldb_irf = CalDBIRF.from_meta(events_info)
irf_path = Path(events_info["IRF_FILENAME"])
info = dict(
OBS_ID=events_info["OBS_ID"],
FILE_DIR=caldb_irf.file_dir,
FILE_NAME=caldb_irf.file_name,
FILE_DIR=irf_path.parent.as_posix(),
FILE_NAME=irf_path.name,
)
yield dict(
HDU_TYPE="aeff", HDU_CLASS="aeff_2d", HDU_NAME="EFFECTIVE AREA", **info
Expand Down Expand Up @@ -622,6 +645,11 @@ def file_dir(self):
telescop = self.telescop.lower()
return f"$CALDB/data/{telescop}/{self.caldb}/bcf/{self.irf}"

@property
def file_path(self):
return Path(f"{self.file_dir}/{self.file_name}")

@property
def file_name(self):
return "irf_file.fits"
path = make_path(self.file_dir)
return list(path.iterdir())[0].name