diff --git a/examples/04_training/01_train_model.py b/examples/04_training/01_train_model.py index 5b747a11d..2d82bf272 100644 --- a/examples/04_training/01_train_model.py +++ b/examples/04_training/01_train_model.py @@ -19,11 +19,6 @@ from graphnet.utilities.logging import Logger -# Make sure W&B output directory exists -WANDB_DIR = "./wandb/" -os.makedirs(WANDB_DIR, exist_ok=True) - - def main( dataset_config_path: str, model_config_path: str, @@ -34,18 +29,23 @@ def main( num_workers: int, prediction_names: Optional[List[str]], suffix: Optional[str] = None, + wandb: bool = False, ) -> None: """Run example.""" # Construct Logger logger = Logger() # Initialise Weights & Biases (W&B) run - wandb_logger = WandbLogger( - project="example-script", - entity="graphnet-team", - save_dir=WANDB_DIR, - log_model=True, - ) + if wandb: + # Make sure W&B output directory exists + wandb_dir = "./wandb/" + os.makedirs(wandb_dir, exist_ok=True) + wandb_logger = WandbLogger( + project="example-script", + entity="graphnet-team", + save_dir=wandb_dir, + log_model=True, + ) # Build model model_config = ModelConfig.load(model_config_path) @@ -80,7 +80,7 @@ def main( # Log configurations to W&B # NB: Only log to W&B on the rank-zero process in case of multi-GPU # training. - if rank_zero_only.rank == 0: + if wandb and rank_zero_only.rank == 0: wandb_logger.experiment.config.update(config) wandb_logger.experiment.config.update(model_config.as_dict()) wandb_logger.experiment.config.update(dataset_config.as_dict()) @@ -98,7 +98,7 @@ def main( dataloaders["train"], dataloaders["validation"], callbacks=callbacks, - logger=wandb_logger, + logger=wandb_logger if wandb else None, **config.fit, ) @@ -166,6 +166,12 @@ def main( default=None, ) + parser.add_argument( + "--wandb", + action="store_true", + help="If True, Weights & Biases are used to track the experiment.", + ) + args = parser.parse_args() main( @@ -178,4 +184,5 @@ def main( args.num_workers, args.prediction_names, args.suffix, + args.wandb, ) diff --git a/examples/04_training/02_train_model_without_configs.py b/examples/04_training/02_train_model_without_configs.py index d1de63780..27e112ce5 100644 --- a/examples/04_training/02_train_model_without_configs.py +++ b/examples/04_training/02_train_model_without_configs.py @@ -25,10 +25,6 @@ features = FEATURES.PROMETHEUS truth = TRUTH.PROMETHEUS -# Make sure W&B output directory exists -WANDB_DIR = "./wandb/" -os.makedirs(WANDB_DIR, exist_ok=True) - def main( path: str, @@ -40,18 +36,23 @@ def main( early_stopping_patience: int, batch_size: int, num_workers: int, + wandb: bool = False, ) -> None: """Run example.""" # Construct Logger logger = Logger() # Initialise Weights & Biases (W&B) run - wandb_logger = WandbLogger( - project="example-script", - entity="graphnet-team", - save_dir=WANDB_DIR, - log_model=True, - ) + if wandb: + # Make sure W&B output directory exists + wandb_dir = "./wandb/" + os.makedirs(wandb_dir, exist_ok=True) + wandb_logger = WandbLogger( + project="example-script", + entity="graphnet-team", + save_dir=wandb_dir, + log_model=True, + ) logger.info(f"features: {features}") logger.info(f"truth: {truth}") @@ -72,9 +73,9 @@ def main( archive = os.path.join(EXAMPLE_OUTPUT_DIR, "train_model_without_configs") run_name = "dynedge_{}_example".format(config["target"]) - - # Log configuration to W&B - wandb_logger.experiment.config.update(config) + if wandb: + # Log configuration to W&B + wandb_logger.experiment.config.update(config) ( training_dataloader, @@ -137,17 +138,16 @@ def main( training_dataloader, validation_dataloader, callbacks=callbacks, - logger=wandb_logger, + logger=wandb_logger if wandb else None, **config["fit"], ) # Get predictions - prediction_columns = [config["target"] + "_pred"] - additional_attributes = [config["target"]] + additional_attributes = model.target_labels + assert isinstance(additional_attributes, list) # mypy results = model.predict_as_dataframe( validation_dataloader, - prediction_columns=prediction_columns, additional_attributes=additional_attributes + ["event_no"], ) @@ -206,6 +206,12 @@ def main( "num-workers", ) + parser.add_argument( + "--wandb", + action="store_true", + help="If True, Weights & Biases are used to track the experiment.", + ) + args = parser.parse_args() main( @@ -218,4 +224,5 @@ def main( args.early_stopping_patience, args.batch_size, args.num_workers, + args.wandb, ) diff --git a/examples/04_training/01_train_models.sh b/examples/04_training/03_train_multiple_models.sh similarity index 100% rename from examples/04_training/01_train_models.sh rename to examples/04_training/03_train_multiple_models.sh diff --git a/src/graphnet/models/coarsening.py b/src/graphnet/models/coarsening.py index 020ebdd19..68eab50b9 100644 --- a/src/graphnet/models/coarsening.py +++ b/src/graphnet/models/coarsening.py @@ -28,7 +28,7 @@ from torch_geometric.utils import degree # NOTE: From [https://github.com/pyg-team/pytorch_geometric/pull/4903] -# TODO: Remove once bumping to torch_geometric>=2.1.0 +# TODO: Remove once bumping to torch_geometric>=2.1.0 # See [https://github.com/pyg-team/pytorch_geometric/blob/master/CHANGELOG.md] diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py index cafef6a65..cae08b4d4 100644 --- a/src/graphnet/models/model.py +++ b/src/graphnet/models/model.py @@ -10,6 +10,7 @@ import pandas as pd from pytorch_lightning import Trainer, LightningModule from pytorch_lightning.callbacks.callback import Callback +from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.loggers.logger import Logger as LightningLogger import torch from torch import Tensor @@ -18,6 +19,7 @@ from graphnet.utilities.logging import Logger from graphnet.utilities.config import Configurable, ModelConfig +from graphnet.training.callbacks import ProgressBar class Model(Logger, Configurable, LightningModule, ABC): @@ -88,8 +90,17 @@ def fit( **trainer_kwargs: Any, ) -> None: """Fit `Model` using `pytorch_lightning.Trainer`.""" - self.train(mode=True) + # Checks + if callbacks is None: + callbacks = self._create_default_callbacks( + val_dataloader=val_dataloader, + ) + elif val_dataloader is not None: + callbacks = self._add_early_stopping( + val_dataloader=val_dataloader, callbacks=callbacks + ) + self.train(mode=True) self._construct_trainers( max_epochs=max_epochs, gpus=gpus, @@ -110,6 +121,38 @@ def fit( self.warning("[ctrl+c] Exiting gracefully.") pass + def _create_default_callbacks(self, val_dataloader: DataLoader) -> List: + callbacks = [ProgressBar()] + callbacks = self._add_early_stopping( + val_dataloader=val_dataloader, callbacks=callbacks + ) + return callbacks + + def _add_early_stopping( + self, val_dataloader: DataLoader, callbacks: List + ) -> List: + if val_dataloader is None: + return callbacks + has_early_stopping = False + assert isinstance(callbacks, list) + for callback in callbacks: + if isinstance(callback, EarlyStopping): + has_early_stopping = True + + if not has_early_stopping: + callbacks.append( + EarlyStopping( + monitor="val_loss", + patience=5, + ) + ) + self.warning_once( + "Got validation dataloader but no EarlyStopping callback. An " + "EarlyStopping callback has been added automatically with " + "patience=5 and monitor = 'val_loss'." + ) + return callbacks + def predict( self, dataloader: DataLoader, @@ -178,6 +221,7 @@ def predict_as_dataframe( "doesn't resample batches; or do not request " "`additional_attributes`." ) + self.info(f"Column names for predictions are: \n {prediction_columns}") predictions_torch = self.predict( dataloader=dataloader, gpus=gpus, diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index af51ddc19..41b70bb26 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -8,6 +8,7 @@ from torch.optim import Adam from torch.utils.data import DataLoader from torch_geometric.data import Data +import pandas as pd from graphnet.models.coarsening import Coarsening from graphnet.utilities.config import save_model_config @@ -62,6 +63,18 @@ def __init__( self._scheduler_kwargs = scheduler_kwargs or dict() self._scheduler_config = scheduler_config or dict() + @property + def target_labels(self) -> List[str]: + """Return target label.""" + return [label for task in self._tasks for label in task._target_labels] + + @property + def prediction_labels(self) -> List[str]: + """Return prediction labels.""" + return [ + label for task in self._tasks for label in task._prediction_labels + ] + def configure_optimizers(self) -> Dict[str, Any]: """Configure the model's optimizer(s).""" optimizer = self._optimizer_class( @@ -175,3 +188,31 @@ def predict( gpus=gpus, distribution_strategy=distribution_strategy, ) + + def predict_as_dataframe( + self, + dataloader: DataLoader, + prediction_columns: Optional[List[str]] = None, + *, + node_level: bool = False, + additional_attributes: Optional[List[str]] = None, + index_column: str = "event_no", + gpus: Optional[Union[List[int], int]] = None, + distribution_strategy: Optional[str] = None, + ) -> pd.DataFrame: + """Return predictions for `dataloader` as a DataFrame. + + Include `additional_attributes` as additional columns in the output + DataFrame. + """ + if prediction_columns is None: + prediction_columns = self.prediction_labels + return super().predict_as_dataframe( + dataloader=dataloader, + prediction_columns=prediction_columns, + node_level=node_level, + additional_attributes=additional_attributes, + index_column=index_column, + gpus=gpus, + distribution_strategy=distribution_strategy, + ) diff --git a/src/graphnet/models/task/reconstruction.py b/src/graphnet/models/task/reconstruction.py index 13b83f7a4..52611839a 100644 --- a/src/graphnet/models/task/reconstruction.py +++ b/src/graphnet/models/task/reconstruction.py @@ -12,6 +12,8 @@ class AzimuthReconstructionWithKappa(Task): """Reconstructs azimuthal angle and associated kappa (1/var).""" # Requires two features: untransformed points in (x,y)-space. + default_target_labels = ["azimuth"] + default_prediction_labels = ["azimuth_pred", "azimuth_kappa"] nb_inputs = 2 def _forward(self, x: Tensor) -> Tensor: @@ -28,6 +30,8 @@ class AzimuthReconstruction(AzimuthReconstructionWithKappa): """Reconstructs azimuthal angle.""" # Requires two features: untransformed points in (x,y)-space. + default_target_labels = ["azimuth"] + default_prediction_labels = ["azimuth_pred"] nb_inputs = 2 def _forward(self, x: Tensor) -> Tensor: @@ -46,6 +50,15 @@ class DirectionReconstructionWithKappa(Task): """Reconstructs direction with kappa from the 3D-vMF distribution.""" # Requires three features: untransformed points in (x,y,z)-space. + default_target_labels = [ + "direction" + ] # contains dir_x, dir_y, dir_z see https://github.com/graphnet-team/graphnet/blob/95309556cfd46a4046bc4bd7609888aab649e295/src/graphnet/training/labels.py#L29 + default_prediction_labels = [ + "dir_x_pred", + "dir_y_pred", + "dir_z_pred", + "direction_kappa", + ] nb_inputs = 3 def _forward(self, x: Tensor) -> Tensor: @@ -61,6 +74,8 @@ class ZenithReconstruction(Task): """Reconstructs zenith angle.""" # Requires two features: zenith angle itself. + default_target_labels = ["zenith"] + default_prediction_labels = ["zenith_pred"] nb_inputs = 1 def _forward(self, x: Tensor) -> Tensor: @@ -72,6 +87,8 @@ class ZenithReconstructionWithKappa(ZenithReconstruction): """Reconstructs zenith angle and associated kappa (1/var).""" # Requires one feature in addition to `ZenithReconstruction`: kappa (unceratinty; 1/variance). + default_target_labels = ["zenith"] + default_prediction_labels = ["zenith_pred", "zenith_kappa"] nb_inputs = 2 def _forward(self, x: Tensor) -> Tensor: @@ -85,6 +102,8 @@ class EnergyReconstruction(Task): """Reconstructs energy using stable method.""" # Requires one feature: untransformed energy + default_target_labels = ["energy"] + default_prediction_labels = ["energy_pred"] nb_inputs = 1 def _forward(self, x: Tensor) -> Tensor: @@ -97,6 +116,8 @@ class EnergyReconstructionWithPower(Task): """Reconstructs energy.""" # Requires one feature: untransformed energy + default_target_labels = ["energy"] + default_prediction_labels = ["energy_pred"] nb_inputs = 1 def _forward(self, x: Tensor) -> Tensor: @@ -108,6 +129,8 @@ class EnergyReconstructionWithUncertainty(EnergyReconstruction): """Reconstructs energy and associated uncertainty (log(var)).""" # Requires one feature in addition to `EnergyReconstruction`: log-variance (uncertainty). + default_target_labels = ["energy"] + default_prediction_labels = ["energy_pred", "energy_sigma"] nb_inputs = 2 def _forward(self, x: Tensor) -> Tensor: @@ -122,6 +145,13 @@ class VertexReconstruction(Task): """Reconstructs vertex position and time.""" # Requires four features, x, y, z, and t. + default_target_labels = ["vertex"] + default_prediction_labels = [ + "position_x_pred", + "position_y_pred", + "position_z_pred", + "interaction_time_pred", + ] nb_inputs = 4 def _forward(self, x: Tensor) -> Tensor: @@ -138,6 +168,12 @@ class PositionReconstruction(Task): """Reconstructs vertex position.""" # Requires three features, x, y, and z. + default_target_labels = ["position"] + default_prediction_labels = [ + "position_x_pred", + "position_y_pred", + "position_z_pred", + ] nb_inputs = 3 def _forward(self, x: Tensor) -> Tensor: @@ -154,6 +190,8 @@ class TimeReconstruction(Task): """Reconstructs time.""" # Requires one feature, time. + default_target_labels = ["interaction_time"] + default_prediction_labels = ["interaction_time_pred"] nb_inputs = 1 def _forward(self, x: Tensor) -> Tensor: @@ -169,6 +207,8 @@ class InelasticityReconstruction(Task): """ # Requires one features: inelasticity itself + default_target_labels = ["elasticity"] + default_prediction_labels = ["elasticity_pred"] nb_inputs = 1 def _forward(self, x: Tensor) -> Tensor: diff --git a/src/graphnet/models/task/task.py b/src/graphnet/models/task/task.py index 5a95d168e..4bcf44171 100644 --- a/src/graphnet/models/task/task.py +++ b/src/graphnet/models/task/task.py @@ -27,13 +27,26 @@ class Task(Model): def nb_inputs(self) -> int: """Return number of inputs assumed by task.""" + @property + @abstractmethod + def default_target_labels(self) -> List[str]: + """Return default target labels.""" + return self._default_target_labels + + @property + @abstractmethod + def default_prediction_labels(self) -> List[str]: + """Return default prediction labels.""" + return self._default_prediction_labels + @save_model_config def __init__( self, *, hidden_size: int, - target_labels: Union[str, List[str]], loss_function: "LossFunction", + target_labels: Optional[Union[str, List[str]]] = None, + prediction_labels: Optional[Union[str, List[str]]] = None, transform_prediction_and_target: Optional[Callable] = None, transform_target: Optional[Callable] = None, transform_inference: Optional[Callable] = None, @@ -46,10 +59,13 @@ def __init__( hidden_size: The number of nodes in the layer feeding into this tasks, used to construct the affine transformation to the predicted quantity. + loss_function: Loss function appropriate to the task. target_labels: Name(s) of the quantity/-ies being predicted, used to extract the target tensor(s) from the `Data` object in `.compute_loss(...)`. - loss_function: Loss function appropriate to the task. + prediction_labels: The name(s) of each column that is predicted by + the model during inference. If not given, the name will auto + matically be set to `target_label + _pred`. transform_prediction_and_target: Optional function to transform both the predicted and target tensor before passing them to the loss function. Useful e.g. for having the model predict @@ -76,14 +92,23 @@ def __init__( """ # Base class constructor super().__init__() - # Check(s) + if target_labels is None: + target_labels = self.default_target_labels if isinstance(target_labels, str): target_labels = [target_labels] + if prediction_labels is None: + prediction_labels = self.default_prediction_labels + if isinstance(prediction_labels, str): + prediction_labels = [prediction_labels] + + assert isinstance(target_labels, List) # mypy + assert isinstance(prediction_labels, List) # mypy # Member variables self._regularisation_loss: Optional[float] = None self._target_labels = target_labels + self._prediction_labels = prediction_labels self._loss_function = loss_function self._inference = False self._loss_weight = loss_weight