Skip to content

Commit

Permalink
Update and add missing docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasBeiske committed Nov 16, 2023
1 parent 195a942 commit 24d3090
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 52 deletions.
95 changes: 55 additions & 40 deletions ctapipe/reco/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,41 +66,47 @@ class MLQualityQuery(QualityQuery):


class SKLearnReconstructor(Reconstructor):
"""Base Class for a Machine Learning Based Reconstructor.
"""
Base Class for a Machine Learning Based Reconstructor.
Keeps a dictionary of sklearn models, the current tools are designed
to train one model per telescope type.
"""

#: Name of the target column in training table
#: Name of the target table column for training.
target: str = ""

#: property predicted, overridden in baseclass
#: Property predicted, overridden in subclass.
property = None

prefix = traits.Unicode(
default_value=None,
allow_none=True,
help="Prefix for the output of this model. If None, ``model_cls`` is used.",
).tag(config=True)
features = traits.List(traits.Unicode(), help="Features to use for this model").tag(
features = traits.List(
traits.Unicode(), help="Features to use for this model."
).tag(config=True)
model_config = traits.Dict({}, help="kwargs for the sklearn model.").tag(
config=True
)
model_config = traits.Dict({}, help="kwargs for the sklearn model").tag(config=True)
model_cls = traits.Enum(
SUPPORTED_MODELS.keys(), default_value=None, allow_none=True
SUPPORTED_MODELS.keys(),
default_value=None,
allow_none=True,
help="Which scikit-learn model to use.",
).tag(config=True)

stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, **kwargs):
Expand Down Expand Up @@ -155,7 +161,8 @@ def __init__(self, subarray=None, models=None, **kwargs):

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.
Fills the event.dl2.<your-feature>[name] container.
Expand All @@ -167,7 +174,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
@abstractmethod
def predict_table(self, key, table: Table) -> Table:
"""
Predict on a table of events
Predict on a table of events.
Parameters
----------
Expand Down Expand Up @@ -203,7 +210,7 @@ def _new_model(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand All @@ -224,9 +231,7 @@ def fit(self, key, table):


class SKLearnRegressionReconstructor(SKLearnReconstructor):
"""
Base class for regression tasks
"""
"""Base class for regression tasks."""

model_cls = traits.Enum(
SUPPORTED_REGRESSORS.keys(),
Expand Down Expand Up @@ -280,9 +285,7 @@ def _table_to_y(self, table, mask=None):


class SKLearnClassificationReconstructor(SKLearnReconstructor):
"""
Base class for classification tasks
"""
"""Base class for classification tasks."""

model_cls = traits.Enum(
SUPPORTED_CLASSIFIERS.keys(),
Expand All @@ -292,7 +295,7 @@ class SKLearnClassificationReconstructor(SKLearnReconstructor):
).tag(config=True)

invalid_class = traits.Integer(
default_value=-1, help="The label to fill in case no prediction could be made"
default_value=-1, help="The label to fill in case no prediction could be made."
).tag(config=True)

positive_class = traits.Integer(
Expand Down Expand Up @@ -357,16 +360,15 @@ def _get_positive_index(self, key):

class EnergyRegressor(SKLearnRegressionReconstructor):
"""
Use a scikit-learn regression model per telescope type to predict primary energy
Use a scikit-learn regression model per telescope type to predict primary energy.
"""

#: Name of the target table column for training
target = "true_energy"
property = ReconstructionProperty.ENERGY

def __call__(self, event: ArrayEventContainer) -> None:
"""
Apply model for a single event and fill result into the event container
Apply model for a single event and fill result into the event container.
"""
for tel_id in event.trigger.tels_with_trigger:
table = collect_features(event, tel_id, self.instrument_table)
Expand Down Expand Up @@ -396,7 +398,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
"""Predict on a table of events."""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand All @@ -422,11 +424,8 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table


class ParticleClassifier(SKLearnClassificationReconstructor):
"""
Predict dl2 particle classification
"""
"""Predict dl2 particle classification."""

#: Name of the target table column for training
target = "true_shower_primary_id"

positive_class = traits.Integer(
Expand All @@ -437,6 +436,9 @@ class ParticleClassifier(SKLearnClassificationReconstructor):
property = ReconstructionProperty.PARTICLE_TYPE

def __call__(self, event: ArrayEventContainer) -> None:
"""
Apply model for a single event and fill result into the event container.
"""
for tel_id in event.trigger.tels_with_trigger:
table = collect_features(event, tel_id, self.instrument_table)
table = self.feature_generator(table, subarray=self.subarray)
Expand All @@ -463,7 +465,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
"""Predict on a table of events."""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand Down Expand Up @@ -495,18 +497,26 @@ class DispReconstructor(Reconstructor):

target = "true_disp"

prefix = traits.Unicode(default_value="disp", allow_none=False).tag(config=True)
prefix = traits.Unicode(
default_value="disp",
allow_none=False,
help="Prefix for the output of this model. If None, ``disp`` is used.",
).tag(config=True)

features = traits.List(
traits.Unicode(), help="Features to use for both models"
traits.Unicode(), help="Features to use for both models."
).tag(config=True)

log_target = traits.Bool(
default_value=False,
help="If True, the model is trained to predict the natural logarithm of the absolute value.",
help=(
"If True, the norm(disp) model is trained to predict ln(norm(disp))"
" and the output is"
" ``prefix_parameter`` = ``sign_prediction`` * ``exp(norm_prediction)``."
),
).tag(config=True)

norm_config = traits.Dict({}, help="kwargs for the sklearn regressor").tag(
norm_config = traits.Dict({}, help="kwargs for the sklearn regressor.").tag(
config=True
)

Expand All @@ -517,7 +527,7 @@ class DispReconstructor(Reconstructor):
help="Which scikit-learn regression model to use.",
).tag(config=True)

sign_config = traits.Dict({}, help="kwargs for the sklearn classifier").tag(
sign_config = traits.Dict({}, help="kwargs for the sklearn classifier.").tag(
config=True
)

Expand All @@ -531,13 +541,13 @@ class DispReconstructor(Reconstructor):
stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, **kwargs):
Expand Down Expand Up @@ -590,7 +600,7 @@ def _new_models(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand Down Expand Up @@ -673,7 +683,8 @@ def _predict(self, key, table):
return prediction, valid

def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.
Fills the event.dl2.tel[tel_id].disp[prefix] container
and event.dl2.tel[tel_id].geometry[prefix] container.
Expand Down Expand Up @@ -739,7 +750,8 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events
"""
Predict on a table of events.
Parameters
----------
Expand Down Expand Up @@ -805,9 +817,11 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table


class CrossValidator(Component):
"""Class to train sklearn based reconstructors in a cross validation"""
"""Class to train sklearn based reconstructors in a cross validation."""

n_cross_validations = traits.Int(5).tag(config=True)
n_cross_validations = traits.Int(
default_value=5, help="Number of cross validation iterations."
).tag(config=True)

output_path = traits.Path(
default_value=None,
Expand All @@ -822,7 +836,7 @@ class CrossValidator(Component):
).tag(config=True)

rng_seed = traits.Int(
default_value=1337, help="Seed for the random number generator"
default_value=1337, help="Random seed for splitting the training data."
).tag(config=True)

def __init__(self, model_component, **kwargs):
Expand All @@ -846,6 +860,7 @@ def __init__(self, model_component, **kwargs):
)

def __call__(self, telescope_type, table):
"""Perform cross validation for the given model."""
if self.n_cross_validations == 0:
return

Expand Down
20 changes: 12 additions & 8 deletions ctapipe/reco/stereo_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,24 @@ def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices):


class StereoCombiner(Component):
"""Base Class for algorithms combining telescope-wise predictions to common prediction"""
"""
Base Class for algorithms combining telescope-wise predictions to common prediction.
"""

prefix = Unicode(
default_value="",
help="Prefix to be added to the output container / column names",
help="Prefix to be added to the output container / column names.",
).tag(config=True)

property = UseEnum(
ReconstructionProperty,
help="Which property is being combined",
help="Which property is being combined.",
).tag(config=True)

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""
Fill event container with stereo predictions
Fill event container with stereo predictions.
"""

@abstractmethod
Expand All @@ -91,17 +93,21 @@ def predict_table(self, mono_predictions: Table) -> Table:

class StereoMeanCombiner(StereoCombiner):
"""
Calculate array-event prediction as (weighted) mean of telescope-wise predictions
Calculate array-event prediction as (weighted) mean of telescope-wise predictions.
"""

weights = CaselessStrEnum(
["none", "intensity", "konrad"],
default_value="none",
help=(
"What kind of weights to use."
" Options: ``none``, ``intensity``, ``konrad``."
),
).tag(config=True)

log_target = Bool(
False,
help="If true, calculate exp(mean(log(values)))",
help="If true, calculate exp(mean(log(values))).",
).tag(config=True)

def __init__(self, *args, **kwargs):
Expand All @@ -118,8 +124,6 @@ def __init__(self, *args, **kwargs):
)

def _calculate_weights(self, data):
""""""

if isinstance(data, Container):
if self.weights == "intensity":
return data.hillas.intensity
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_disp_reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class TrainDispReconstructor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

project_disp = Bool(
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_energy_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TrainEnergyRegressor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

aliases = {
Expand Down
3 changes: 1 addition & 2 deletions ctapipe/tools/train_particle_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ class TrainParticleClassifier(Tool):
).tag(config=True)

random_seed = Int(
default_value=0,
help="Random number seed for sampling and the cross validation splitting",
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

aliases = {
Expand Down
1 change: 1 addition & 0 deletions docs/changes/2456.optimization.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update and add missing docstrings related to the ML functionalities.

0 comments on commit 24d3090

Please sign in to comment.