Skip to content

Commit

Permalink
FIX overwriting metadata when both verified and unverified reported v…
Browse files Browse the repository at this point in the history
…alues (#1186)

* FIX overwriting metadata when both verified and unverified reported value

* fix error message
  • Loading branch information
Wauplin committed Nov 15, 2022
1 parent 711f688 commit 131fd35
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 73 deletions.
41 changes: 16 additions & 25 deletions src/huggingface_hub/repocard.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,47 +788,38 @@ def metadata_update(
else:
existing_results = card.data.eval_results

# Iterate over new results
# Iterate over existing results
# If both results describe the same metric but value is different:
# If overwrite=True: overwrite the metric value
# Else: raise ValueError
# Else: append new result to existing ones.
for new_result in new_results:
result_found = False
for existing_result_index, existing_result in enumerate(
existing_results
):
if all(
[
new_result.dataset_name == existing_result.dataset_name,
new_result.dataset_type == existing_result.dataset_type,
new_result.task_type == existing_result.task_type,
new_result.task_name == existing_result.task_name,
new_result.metric_name == existing_result.metric_name,
new_result.metric_type == existing_result.metric_type,
]
):
if (
new_result.metric_value != existing_result.metric_value
and not overwrite
):
existing_str = (
f"name: {new_result.metric_name}, type:"
f" {new_result.metric_type}"
)
for existing_result in existing_results:
if new_result.is_equal_except_value(existing_result):
if new_result != existing_result and not overwrite:
raise ValueError(
"You passed a new value for the existing metric"
f" '{existing_str}'. Set `overwrite=True` to"
" overwrite existing metrics."
f" 'name: {new_result.metric_name}, type: "
f"{new_result.metric_type}'. Set `overwrite=True`"
" to overwrite existing metrics."
)
result_found = True
card.data.eval_results[existing_result_index] = new_result
existing_result.metric_value = new_result.metric_value
if not result_found:
card.data.eval_results.append(new_result)
else:
# Any metadata that is not a result metric
if (
hasattr(card.data, key)
and getattr(card.data, key) is not None
and not overwrite
and getattr(card.data, key) != value
):
raise ValueError(
f"""You passed a new value for the existing meta data field '{key}'. Set `overwrite=True` to overwrite existing metadata."""
f"You passed a new value for the existing meta data field '{key}'."
" Set `overwrite=True` to overwrite existing metadata."
)
else:
setattr(card.data, key, value)
Expand Down
12 changes: 12 additions & 0 deletions src/huggingface_hub/repocard_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,18 @@ class EvalResult:
# A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
verify_token: Optional[str] = None

def is_equal_except_value(self, other: "EvalResult") -> bool:
"""
Return True if `self` and `other` describe exactly the same metric but with a
different value.
"""
for key, _ in self.__dict__.items():
if key == "metric_value":
continue
if getattr(self, key) != getattr(other, key):
return False
return True


@dataclass
class CardData:
Expand Down
133 changes: 85 additions & 48 deletions tests/test_repocard.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,43 @@
---
"""

DUMMY_MODELCARD_EVAL_RESULT_BOTH_VERIFIED_AND_UNVERIFIED = """---
model-index:
- name: RoBERTa fine-tuned on ReactionGIF
results:
- task:
type: text-classification
name: Text Classification
dataset:
name: ReactionGIF
type: julien-c/reactiongif
config: default
split: test
metrics:
- type: accuracy
value: 0.2662102282047272
name: Accuracy
config: default
verified: false
- task:
type: text-classification
name: Text Classification
dataset:
name: ReactionGIF
type: julien-c/reactiongif
config: default
split: test
metrics:
- type: accuracy
value: 0.6666666666666666
name: Accuracy
config: default
verified: true
---
This is a test model card.
"""

logger = logging.get_logger(__name__)

REPOCARD_DIR = os.path.join(
Expand Down Expand Up @@ -240,17 +277,18 @@ def setUpClass(cls):
def setUp(self) -> None:
self.repo_path = Path(tempfile.mkdtemp())
self.REPO_NAME = repo_name()
self._api.create_repo(f"{USER}/{self.REPO_NAME}")
self.repo_id = f"{USER}/{self.REPO_NAME}"
self._api.create_repo(self.repo_id)
self._api.upload_file(
path_or_fileobj=DUMMY_MODELCARD_EVAL_RESULT.encode(),
repo_id=f"{USER}/{self.REPO_NAME}",
repo_id=self.repo_id,
path_in_repo="README.md",
commit_message="Add README to main branch",
)

self.repo = Repository(
self.repo_path / self.REPO_NAME,
clone_from=f"{USER}/{self.REPO_NAME}",
clone_from=self.repo_id,
use_auth_token=self._token,
git_user="ci",
git_email="ci@dummy.com",
Expand All @@ -260,14 +298,12 @@ def setUp(self) -> None:
)

def tearDown(self) -> None:
self._api.delete_repo(repo_id=f"{self.REPO_NAME}")
self._api.delete_repo(repo_id=self.repo_id)
shutil.rmtree(self.repo_path)

def test_update_dataset_name(self):
new_datasets_data = {"datasets": ["test/test_dataset"]}
metadata_update(
f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token
)
metadata_update(self.repo_id, new_datasets_data, token=self._token)

self.repo.git_pull()
updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md")
Expand All @@ -280,9 +316,7 @@ def test_update_existing_result_with_overwrite(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0][
"value"
] = 0.2862102282047272
metadata_update(
f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=True
)
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

self.repo.git_pull()
updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md")
Expand All @@ -293,14 +327,12 @@ def test_metadata_update_upstream(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0]["value"] = 0.1

path = hf_hub_download(
f"{USER}/{self.REPO_NAME}",
self.repo_id,
filename=REPOCARD_NAME,
use_auth_token=self._token,
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=True
)
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

self.assertNotEqual(metadata_load(path), new_metadata)
self.assertEqual(metadata_load(path), self.existing_metadata)
Expand All @@ -319,17 +351,12 @@ def test_update_existing_result_without_overwrite(self):
),
):
metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=False,
self.repo_id, new_metadata, token=self._token, overwrite=False
)

def test_update_existing_field_without_overwrite(self):
new_datasets_data = {"datasets": "['test/test_dataset']"}
metadata_update(
f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token
)
metadata_update(self.repo_id, new_datasets_data, token=self._token)

with pytest.raises(
ValueError,
Expand All @@ -340,7 +367,7 @@ def test_update_existing_field_without_overwrite(self):
):
new_datasets_data = {"datasets": "['test/test_dataset_2']"}
metadata_update(
f"{USER}/{self.REPO_NAME}",
self.repo_id,
new_datasets_data,
token=self._token,
overwrite=False,
Expand All @@ -362,9 +389,7 @@ def test_update_new_result_existing_dataset(self):
dataset_split="test",
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False
)
metadata_update(self.repo_id, new_result, token=self._token, overwrite=False)

expected_metadata = copy.deepcopy(self.existing_metadata)
expected_metadata["model-index"][0]["results"][0]["metrics"].append(
Expand All @@ -391,9 +416,7 @@ def test_update_new_result_new_dataset(self):
dataset_split="test",
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False
)
metadata_update(self.repo_id, new_result, token=self._token, overwrite=False)

expected_metadata = copy.deepcopy(self.existing_metadata)
expected_metadata["model-index"][0]["results"].append(
Expand All @@ -412,7 +435,7 @@ def test_update_metadata_on_empty_text_content(self) -> None:
with self.repo.commit("Add README to main branch"):
with open("README.md", "w") as f:
f.write(DUMMY_MODELCARD_NO_TEXT_CONTENT)
metadata_update(f"{USER}/{self.REPO_NAME}", {"tag": "test"}, token=self._token)
metadata_update(self.repo_id, {"tag": "test"}, token=self._token)

# Check update went fine
self.repo.git_pull()
Expand All @@ -426,43 +449,57 @@ def test_update_with_existing_name(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0][
"value"
] = 0.2862102282047272
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=True,
)

card_data = ModelCard.load(f"{USER}/{self.REPO_NAME}", token=self._token)

card_data = ModelCard.load(self.repo_id, token=self._token)
self.assertEqual(
card_data.data.model_name, self.existing_metadata["model-index"][0]["name"]
)

def test_update_without_existing_name(self):

# delete existing metadata
self._api.upload_file(
path_or_fileobj="# Test".encode(),
repo_id=f"{USER}/{self.REPO_NAME}",
repo_id=self.repo_id,
path_in_repo="README.md",
commit_message="Add README to main branch",
)

new_metadata = copy.deepcopy(self.existing_metadata)
new_metadata["model-index"][0].pop("name")

metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=True,
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

card_data = ModelCard.load(self.repo_id, token=self._token)

self.assertEqual(card_data.data.model_name, self.repo_id)

def test_update_with_both_verified_and_unverified_metric(self):
"""Regression test for #1185.
See https://github.com/huggingface/huggingface_hub/issues/1185.
"""
self._api.upload_file(
path_or_fileobj=DUMMY_MODELCARD_EVAL_RESULT_BOTH_VERIFIED_AND_UNVERIFIED.encode(),
repo_id=self.repo_id,
path_in_repo="README.md",
)
card = ModelCard.load(self.repo_id)
metadata = card.data.to_dict()
metadata_update(self.repo_id, metadata=metadata, overwrite=True, token=TOKEN)

card_data = ModelCard.load(self.repo_id, token=self._token)

self.assertEqual(len(card_data.data.eval_results), 2)
first_result = card_data.data.eval_results[0]
second_result = card_data.data.eval_results[1]

card_data = ModelCard.load(f"{USER}/{self.REPO_NAME}", token=self._token)
# One is verified, the other not
self.assertFalse(first_result.verified)
self.assertTrue(second_result.verified)

self.assertEqual(card_data.data.model_name, f"{USER}/{self.REPO_NAME}")
# Result values are different
self.assertEqual(first_result.metric_value, 0.2662102282047272)
self.assertEqual(second_result.metric_value, 0.6666666666666666)


class TestMetadataUpdateOnMissingCard(unittest.TestCase):
Expand Down

0 comments on commit 131fd35

Please sign in to comment.