Skip to content

Commit

Permalink
Improve sweep tag handling
Browse files Browse the repository at this point in the history
  • Loading branch information
cbalioglu committed Jul 5, 2024
1 parent a09d285 commit e016e26
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions src/fairseq2/recipes/utils/sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ class SweepTagger:
"anomaly_detection",
"checkpoint_after_n_steps",
"checkpoint_every_n_steps",
"compute_scores",
"decode_capacity_increment",
"keep_best_n_checkpoints",
"keep_last_n_checkpoints",
"max_num_data_epochs",
"monitored_gang",
"num_prefetch",
"prefill_chunk_size",
"profile",
"publish_metrics_after_n_steps",
"publish_metrics_every_n_steps",
Expand Down Expand Up @@ -110,7 +113,13 @@ def generate_from_diff(preset_config: DataClass, config: DataClass) -> None:

s = ".".join(output)

return s[:128] # Cap to maximum 128 characters.
# Cap to maximum of 128 characters.
if len(s) > 128:
# Make sure we avoid name conflicts by prepending the hash of the
# whole tag to the truncated one.
s = s[:120] + self._hash(s)

return s

@classmethod
def _to_tag_value(cls, value: Any) -> Optional[str]:
Expand All @@ -122,9 +131,7 @@ def _to_tag_value(cls, value: Any) -> Optional[str]:
if len(s) < 16:
return s

s = sha1(s.encode("utf-8")).hexdigest()

return s[:8]
return cls._hash(s)

if isinstance(value, bool):
return "t" if value else "f"
Expand Down Expand Up @@ -177,5 +184,11 @@ def _to_tag_value(cls, value: Any) -> Optional[str]:
def _remove_non_word(s: str) -> str:
return re.sub(r"[^-_\w]", "", s)

@staticmethod
def _hash(s: str) -> str:
s = sha1(s.encode("utf-8")).hexdigest()

return s[:8]


default_sweep_tagger = SweepTagger()

0 comments on commit e016e26

Please sign in to comment.