Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor FluxPointEstimator to use Datasets #2112

Merged
merged 7 commits into from Apr 12, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions gammapy/scripts/spectrum_pipe.py
Expand Up @@ -88,13 +88,14 @@ def run_fit(self, optimize_opts=None):

# TODO: Don't stack again if SpectrumFit has already done the stacking
stacked_obs = self.extraction.spectrum_observations.stack()
self.egm = SpectrumEnergyGroupMaker(stacked_obs)
self.egm = SpectrumEnergyGroupMaker(stacked_obs.e_reco)
self.egm.compute_groups_fixed(self.config["fp_binning"])

datasets = [obs.to_spectrum_dataset() for obs in self.extraction.spectrum_observations]
self.flux_point_estimator = FluxPointEstimator(
groups=self.egm.groups,
model=self.fit.result[0].model,
obs=self.extraction.spectrum_observations,
datasets=datasets,
)
self.flux_points = self.flux_point_estimator.run()

Expand Down
20 changes: 1 addition & 19 deletions gammapy/spectrum/dataset.py
Expand Up @@ -265,22 +265,4 @@ def read(cls, filename):
OGIP PHA file to read
"""
observation = SpectrumObservation.read(filename)
return SpectrumDatasetOnOff._from_spectrum_observation(observation)

# TODO: check if SpectrumObservation is needed in the long run
@classmethod
def _from_spectrum_observation(cls, observation):
"""Creates a SpectrumDatasetOnOff from a SpectrumObservation object"""

# Build mask from quality vector
quality = observation.on_vector.quality
mask = quality == 0

return cls(
counts_on=observation.on_vector,
aeff=observation.aeff,
counts_off=observation.off_vector,
edisp=observation.edisp,
livetime=observation.livetime,
mask=mask,
)
return observation.to_spectrum_dataset()
13 changes: 7 additions & 6 deletions gammapy/spectrum/energy_group.py
Expand Up @@ -21,6 +21,7 @@
from astropy.table import Table
from astropy.table import vstack as table_vstack
from ..utils.table import table_from_row_data, table_row_to_dict
from ..utils.energy import EnergyBounds

__all__ = ["SpectrumEnergyGroup", "SpectrumEnergyGroups", "SpectrumEnergyGroupMaker"]

Expand Down Expand Up @@ -219,8 +220,8 @@ class SpectrumEnergyGroupMaker:

Attributes
----------
obs : `~gammapy.spectrum.SpectrumObservation`
Spectrum observation data
e_reco : `~astropy.units.Quantity`
Array of reconstructed energies.
groups : `~gammapy.spectrum.SpectrumEnergyGroups`
List of energy groups

Expand All @@ -229,13 +230,13 @@ class SpectrumEnergyGroupMaker:
SpectrumEnergyGroups, SpectrumEnergyGroup, FluxPointEstimator
"""

def __init__(self, obs):
self.obs = obs
def __init__(self, e_reco):
self.e_reco = EnergyBounds(e_reco)
self.groups = None

def groups_from_obs(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this method can be removed I think

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or renamed? not sure what it does.

"""Compute energy groups with one group per energy bin."""
ebounds_obs = self.obs.e_reco
ebounds_obs = self.e_reco
size = ebounds_obs.nbins
table = Table()
table["bin_idx"] = np.arange(size)
Expand All @@ -256,7 +257,7 @@ def compute_groups_fixed(self, ebounds):
ebounds : `~astropy.units.Quantity`
Energy bounds array
"""
ebounds_src = self.obs.e_reco.to(ebounds.unit)
ebounds_src = self.e_reco.to(ebounds.unit)
bin_edges_src = np.arange(len(ebounds_src))

temp = np.interp(ebounds, ebounds_src, bin_edges_src)
Expand Down
58 changes: 22 additions & 36 deletions gammapy/spectrum/flux_point.py
Expand Up @@ -8,10 +8,9 @@
from ..utils.scripts import make_path
from ..utils.table import table_standardise_units_copy, table_from_row_data
from ..utils.interpolation import ScaledRegularGridInterpolator
from ..utils.fitting import Dataset
from ..utils.fitting import Dataset, Datasets
from .models import PowerLaw, ScaleModel
from .powerlaw import power_law_integral_flux
from .observation import SpectrumObservationList, SpectrumObservation

__all__ = ["FluxPoints", "FluxPointEstimator", "FluxPointsDataset"]

Expand Down Expand Up @@ -783,7 +782,7 @@ class FluxPointEstimator:

def __init__(
self,
obs,
datasets,
groups,
model,
norm_min=0.2,
Expand All @@ -793,10 +792,13 @@ def __init__(
sigma=1,
sigma_ul=2,
):
if isinstance(obs, SpectrumObservation):
obs = SpectrumObservationList([obs])
self.obs = SpectrumObservationList(obs)
datasets = Datasets(datasets).copy()
if not datasets.is_all_same_type and datasets.is_all_same_shape:
raise ValueError("Flux point estimation only supported for a"
" homogeneous set of datasets.")

# make a copy to not modify the input datasets
self.datasets = datasets
self.groups = groups
self.model = ScaleModel(model)
self.model.parameters["norm"].min = 0
Expand All @@ -808,13 +810,17 @@ def __init__(
self.sigma = sigma
self.sigma_ul = sigma_ul

# set the model on all datasets
for dataset in self.datasets.datasets:
dataset.model = self.model

@property
def ref_model(self):
return self.model.model

def __str__(self):
s = "{}:\n".format(self.__class__.__name__)
s += str(self.obs) + "\n"
s += str(self.datasets) + "\n"
s += str(self.groups) + "\n"
s += str(self.model) + "\n"
return s
Expand Down Expand Up @@ -843,6 +849,11 @@ def run(self, steps="all"):
table = table_from_row_data(rows=rows, meta=meta)
return FluxPoints(table).to_sed_type("dnde")

def _energy_mask(self, e_group):
energy_mask = np.zeros(self.datasets.datasets[0].data_shape)
energy_mask[e_group.bin_idx_min:e_group.bin_idx_max + 1] = 1
return energy_mask.astype(bool)

def estimate_flux_point(self, e_group, steps="all"):
"""Estimate flux point for a single energy group.

Expand Down Expand Up @@ -882,7 +893,7 @@ def estimate_flux_point(self, e_group, steps="all"):
]
)

quality_orig = self._set_quality(e_group)
self.datasets.mask = self._energy_mask(e_group)
result.update(self.estimate_norm())

if steps == "all":
Expand All @@ -903,7 +914,6 @@ def estimate_flux_point(self, e_group, steps="all"):
if "norm-scan" in steps:
result.update(self.estimate_norm_scan(result))

self._restore_quality(quality_orig)
return result

def estimate_norm_errn_errp(self):
Expand Down Expand Up @@ -951,7 +961,7 @@ def estimate_norm_ts(self):
"""
parameters = self.model.parameters

loglike = self.fit.total_stat(parameters)
loglike = self.datasets.likelihood()
norm_best_fit = parameters["norm"].value

# store best fit amplitude, set amplitude of fit model to zero
Expand Down Expand Up @@ -985,9 +995,9 @@ def estimate_norm(self):
result : dict
Dict with "norm" and "loglike" for the flux point.
"""
from .fit import SpectrumFit
from ..utils.fitting import Fit

self.fit = SpectrumFit(self.obs, self.model)
self.fit = Fit(self.datasets)
result = self.fit.optimize()

if result.success:
Expand All @@ -1003,30 +1013,6 @@ def estimate_norm(self):

return {"norm": norm, "loglike": result.total_stat}

# TODO: clean up this helper function and maybe move it to SpectrumObservations
def _set_quality(self, energy_group):
quality_orig = []

for obs in self.obs:
# Set quality bins to only bins in energy_group
quality_orig_unit = obs.on_vector.quality
quality_len = len(quality_orig_unit)
quality_orig.append(quality_orig_unit)
quality = np.zeros(quality_len, dtype=int)
for bin in range(quality_len):
if (
(bin < energy_group.bin_idx_min)
or (bin > energy_group.bin_idx_max)
or (energy_group.bin_type != "normal")
):
quality[bin] = 1
obs.on_vector.quality = quality

return quality_orig

def _restore_quality(self, quality_orig):
for obs, quality in zip(self.obs, quality_orig):
obs.on_vector.quality = quality


class FluxPointsDataset(Dataset):
Expand Down
18 changes: 18 additions & 0 deletions gammapy/spectrum/observation.py
Expand Up @@ -518,6 +518,24 @@ def copy(self):
"""A deep copy."""
return copy.deepcopy(self)

def to_spectrum_dataset(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, it is probably much better to have this function on SpectrumObservation rather than on the Dataset itself.

"""Creates a SpectrumDatasetOnOff from a SpectrumObservation object"""
from .dataset import SpectrumDatasetOnOff

# Build mask from quality vector
quality = self.on_vector.quality
mask = quality == 0

return SpectrumDatasetOnOff(
counts_on=self.on_vector,
aeff=self.aeff,
counts_off=self.off_vector,
edisp=self.edisp,
livetime=self.livetime,
mask=mask,
)



class SpectrumObservationList(UserList):
"""List of `~gammapy.spectrum.SpectrumObservation` objects."""
Expand Down
12 changes: 6 additions & 6 deletions gammapy/spectrum/tests/test_energy_group.py
Expand Up @@ -117,7 +117,7 @@ def obs(self):

@staticmethod
def test_groups_from_obs(obs):
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
seg.groups_from_obs()
groups = seg.groups

Expand All @@ -126,7 +126,7 @@ def test_groups_from_obs(obs):
@staticmethod
def test_compute_groups_fixed_basic(obs):
ebounds = [1, 2, 10] * u.TeV
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
seg.compute_groups_fixed(ebounds=ebounds)
groups = seg.groups

Expand All @@ -145,7 +145,7 @@ def test_compute_groups_fixed_basic(obs):
[[1.8, 4.8, 7.2] * u.TeV, [2, 5, 7] * u.TeV, [2000, 5000, 7000] * u.GeV],
)
def test_compute_groups_fixed_edges(obs, ebounds):
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
seg.compute_groups_fixed(ebounds=ebounds)
groups = seg.groups

Expand All @@ -163,7 +163,7 @@ def test_compute_groups_fixed_edges(obs, ebounds):
@staticmethod
def test_compute_groups_fixed_below_range(obs):
ebounds = [0.7, 0.8, 1, 4] * u.TeV
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
seg.compute_groups_fixed(ebounds=ebounds)
groups = seg.groups

Expand All @@ -179,7 +179,7 @@ def test_compute_groups_fixed_below_range(obs):
@staticmethod
def test_compute_groups_fixed_above_range(obs):
ebounds = [5, 7, 11, 13] * u.TeV
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
seg.compute_groups_fixed(ebounds=ebounds)
groups = seg.groups

Expand All @@ -196,6 +196,6 @@ def test_compute_groups_fixed_above_range(obs):
@staticmethod
def test_compute_groups_fixed_outside_range(obs):
ebounds = [20, 30, 40] * u.TeV
seg = SpectrumEnergyGroupMaker(obs=obs)
seg = SpectrumEnergyGroupMaker(e_reco=obs.e_reco)
with pytest.raises(ValueError):
seg.compute_groups_fixed(ebounds=ebounds)
26 changes: 10 additions & 16 deletions gammapy/spectrum/tests/test_flux_point_estimator.py
Expand Up @@ -9,10 +9,11 @@
from ..models import PowerLaw, ExponentialCutoffPowerLaw
from ..simulation import SpectrumSimulation
from ..flux_point import FluxPointEstimator
from ..dataset import SpectrumDatasetOnOff


# TODO: use pregenerate data instead
def simulate_obs(model):
def simulate_dataset(model):
energy = np.logspace(-0.5, 1.5, 21) * u.TeV
aeff = EffectiveAreaTable.from_parametrization(energy=energy)
bkg_model = PowerLaw(index=2.5, amplitude="1e-12 cm-2 s-1 TeV-1")
Expand All @@ -24,21 +25,23 @@ def simulate_obs(model):
alpha=0.2,
)
sim.run(seed=[0])
return sim.result[0]
obs = sim.result[0]
return obs.to_spectrum_dataset()


def define_energy_groups(obs):
def define_energy_groups(dataset):
# the energy bounds ar choosen such, that one flux point is
ebounds = [0.1, 1, 10, 100] * u.TeV
segm = SpectrumEnergyGroupMaker(obs=obs)
e_reco = dataset.counts_on.energy.bins
segm = SpectrumEnergyGroupMaker(e_reco)
segm.compute_groups_fixed(ebounds=ebounds)
return segm.groups


def create_fpe(model):
obs = simulate_obs(model)
groups = define_energy_groups(obs)
return FluxPointEstimator(obs=obs, model=model, groups=groups, norm_n_values=3)
dataset = simulate_dataset(model)
groups = define_energy_groups(dataset)
return FluxPointEstimator(datasets=[dataset], model=model, groups=groups, norm_n_values=3)


@pytest.fixture(scope="session")
Expand All @@ -56,15 +59,6 @@ class TestFluxPointEstimator:
def test_str(fpe_pwl):
assert "FluxPointEstimator" in str(fpe_pwl)

@staticmethod
@requires_dependency("iminuit")
def test_energy_range(fpe_pwl):
group = fpe_pwl.groups[1]
fpe_pwl.estimate_flux_point(group)
fit_range = fpe_pwl.fit.true_fit_range[0]
assert_quantity_allclose(fit_range[0], group.energy_min)
assert_quantity_allclose(fit_range[1], group.energy_max)

@staticmethod
@requires_dependency("iminuit")
def test_run_pwl(fpe_pwl):
Expand Down
9 changes: 8 additions & 1 deletion gammapy/utils/fitting/datasets.py
@@ -1,5 +1,6 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
import abc
import copy
from collections import Counter
import numpy as np
from astropy.utils import lazyproperty
Expand Down Expand Up @@ -57,11 +58,13 @@ def __init__(self, datasets, mask=None):

self.mask = mask

@lazyproperty
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to make Datasets.parameters a lazyproperty?

Imagine you want to test another spectral model by doing something like this:

for ds in datasets:
      ds.model = new_model

Will the datasets.parameters be update accordingly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. This is a problem that arises when we set / modify the models on datasets after init. I'll check again what makes sense here and then change it consistently on all dataset classes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've looked into this a bit and currently there is the problem, that we attach the covariance matrix to the Parameters object. If I declare Datasets.parameters as a normal property I can't set the the covariance anymore, because anytime the .parameters attribute is accessed a new merged Parameters object is created from the sub-datasets and the covariance info is lost (this is the error we see on Travis-CI).

For the parallel evaluation of datasets we have to re-think the parameter handling and distribution of parameters in the Datasets class anyway. So for now I'll probably leave it as is with a lazyproperty, but we have to come up with a better solution and documentation, how to set a global model for Datasets or how to handle the covariance.

def parameters(self):
# join parameter lists
parameters = []
for dataset in self.datasets:
parameters += dataset.parameters.parameters
self.parameters = Parameters(parameters)
return Parameters(parameters)

@property
def datasets(self):
Expand Down Expand Up @@ -105,3 +108,7 @@ def __str__(self):
str_ += "\t{key}: {value} \n".format(key=key, value=value)

return str_

def copy(self):
"""A deep copy."""
return copy.deepcopy(self)