Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure metric results are JSON-serializable #10632

Merged
merged 1 commit into from
Mar 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/transformers/trainer.py
Expand Up @@ -98,6 +98,7 @@
TrainOutput,
default_compute_objective,
default_hp_space,
denumpify_detensorize,
get_last_checkpoint,
set_seed,
speed_metrics,
Expand Down Expand Up @@ -1824,6 +1825,9 @@ def prediction_loop(
else:
metrics = {}

# To be JSON-serializable, we need to remove numpy types or zero-d tensors
metrics = denumpify_detensorize(metrics)

if eval_loss is not None:
metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()

Expand Down
26 changes: 22 additions & 4 deletions src/transformers/trainer_utils.py
Expand Up @@ -38,6 +38,13 @@
)


if is_torch_available():
import torch

if is_tf_available():
import tensorflow as tf


def set_seed(seed: int):
"""
Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf`` (if
Expand All @@ -49,14 +56,10 @@ def set_seed(seed: int):
random.seed(seed)
np.random.seed(seed)
if is_torch_available():
import torch

torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# ^^ safe to call this function even if cuda is not available
if is_tf_available():
import tensorflow as tf

tf.random.set_seed(seed)


Expand Down Expand Up @@ -423,6 +426,21 @@ def stop_and_update_metrics(self, metrics=None):
self.update_metrics(stage, metrics)


def denumpify_detensorize(metrics):
"""
Recursively calls `.item()` on the element of the dictionary passed
"""
if isinstance(metrics, (list, tuple)):
return type(metrics)(denumpify_detensorize(m) for m in metrics)
elif isinstance(metrics, dict):
return type(metrics)({k: denumpify_detensorize(v) for k, v in metrics.items()})
elif isinstance(metrics, np.generic):
return metrics.item()
elif is_torch_available() and isinstance(metrics, torch.Tensor) and metrics.numel() == 1:
return metrics.item()
return metrics


class ShardedDDPOption(ExplicitEnum):
SIMPLE = "simple"
ZERO_DP_2 = "zero_dp_2"
Expand Down