Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AL-[1706] throw exception it commiting unchanged dataset #1535

Merged
merged 4 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions hub/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
from tqdm import tqdm # type: ignore
import posixpath

from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union

import hub
Expand Down Expand Up @@ -57,6 +58,7 @@
TensorGroupAlreadyExistsError,
ReadOnlyModeError,
NotLoggedInError,
EmptyCommitError,
)
from hub.util.keys import (
dataset_exists,
Expand All @@ -73,7 +75,7 @@
auto_checkout,
checkout,
commit,
current_commit_has_data,
current_commit_has_change,
load_meta,
warn_node_checkout,
load_version_info,
Expand Down Expand Up @@ -594,20 +596,27 @@ def __del__(self):
except Exception: # python shutting down
pass

def commit(self, message: Optional[str] = None) -> str:
def commit(self, message: Optional[str] = None, allow_empty=False) -> str:
"""Stores a snapshot of the current state of the dataset.
Note: Commiting from a non-head node in any branch, will lead to an auto checkout to a new branch.
This same behaviour will happen if new samples are added or existing samples are updated from a non-head node.

Args:
message (str, optional): Used to describe the commit.
allow_empty (bool): If True, commit even if there are no changes

Returns:
str: the commit id of the stored commit that can be used to access the snapshot.

Raises:
Exception: if dataset is a filtered view.
EmptyCommitError: if there are no changes and user does not forced to commit unchanged data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EmptyCommitError: if there are no changes and user does not forced to commit unchanged data
EmptyCommitError: if there are no changes and `allow_empty` is not True.

"""
if not allow_empty and not self.has_head_changes:
raise EmptyCommitError(
"There are no changes, commit is not done. Try again with allow_empty=True."
)

return self._commit(message)

def _commit(self, message: Optional[str] = None, hash: Optional[str] = None) -> str:
Expand Down Expand Up @@ -811,7 +820,7 @@ def read_only(self):
def has_head_changes(self):
"""Returns True if currently at head node and uncommitted changes are present."""
commit_node = self.version_state["commit_node"]
return not commit_node.children and current_commit_has_data(
return not commit_node.children and current_commit_has_change(
self.version_state, self.storage
)

Expand Down
37 changes: 29 additions & 8 deletions hub/core/version_control/test_version_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ReadOnlyModeError,
InfoError,
TensorModifiedError,
EmptyCommitError,
)

NO_COMMIT_PASSED_DIFF = ""
Expand Down Expand Up @@ -60,6 +61,26 @@ def test_commit(local_ds):
local_ds.checkout(a, create=True)


"""
test for checking unchanged dataset commits
"""


def test_unchanged_commit(local_ds):
with local_ds:
local_ds.create_tensor("abc")
local_ds.abc.append(1)
local_ds.log()
a = local_ds.commit("first")
local_ds.checkout(a)
assert local_ds.commit_id == a
with pytest.raises(EmptyCommitError):
b = local_ds.commit("second")
c = local_ds.commit("third", allow_empty=True)
local_ds.checkout(c)
assert local_ds.commit_id == c


def test_commit_checkout(local_ds):
with local_ds:
local_ds.create_tensor("img")
Expand Down Expand Up @@ -402,7 +423,7 @@ def test_dataset_info(local_ds):
assert len(local_ds.info) == 1
assert local_ds.info.key == "value"

a = local_ds.commit("added key, value")
a = local_ds.commit("added key, value", allow_empty=True)
assert len(local_ds.info) == 1
assert local_ds.info.key == "value"

Expand All @@ -411,7 +432,7 @@ def test_dataset_info(local_ds):
assert local_ds.info.key == "value"
assert local_ds.info.key2 == "value2"

b = local_ds.commit("added key2, value2")
b = local_ds.commit("added key2, value2", allow_empty=True)
assert len(local_ds.info) == 2
assert local_ds.info.key == "value"
assert local_ds.info.key2 == "value2"
Expand All @@ -423,7 +444,7 @@ def test_dataset_info(local_ds):
local_ds.info.key = "notvalue"
assert len(local_ds.info) == 1
assert local_ds.info.key == "notvalue"
c = local_ds.commit("changed key to notvalue")
c = local_ds.commit("changed key to notvalue", allow_empty=True)

local_ds.checkout(a)
assert len(local_ds.info) == 1
Expand Down Expand Up @@ -497,7 +518,7 @@ def test_delete(local_ds):
local_ds.abc.append(1)
a = local_ds.commit("first")
local_ds.delete_tensor("abc")
b = local_ds.commit("second")
b = local_ds.commit("second", allow_empty=True)
local_ds.checkout(a)
assert local_ds.abc[0].numpy() == 1
local_ds.checkout(b)
Expand All @@ -507,7 +528,7 @@ def test_delete(local_ds):
local_ds["x/y/z"].append(1)
c = local_ds.commit("third")
local_ds["x"].delete_tensor("y/z")
d = local_ds.commit("fourth")
d = local_ds.commit("fourth", allow_empty=True)
local_ds.checkout(c)
assert local_ds["x/y/z"][0].numpy() == 1
local_ds.checkout(d)
Expand Down Expand Up @@ -1182,15 +1203,15 @@ def test_commits(local_ds):
commits = local_ds.commits
assert len(commits) == 2
commit_details_helper(commits, local_ds)
local_ds.commit()
local_ds.commit(allow_empty=True)
commits = local_ds.commits
assert len(commits) == 3
commit_details_helper(commits, local_ds)
local_ds.checkout("main")
commits = local_ds.commits
assert len(commits) == 2
commit_details_helper(commits, local_ds)
local_ds.commit()
local_ds.commit(allow_empty=True)
commits = local_ds.commits
assert len(commits) == 3
commit_details_helper(commits, local_ds)
Expand Down Expand Up @@ -1280,7 +1301,7 @@ def test_modified_samples(memory_ds):

memory_ds.checkout(first_commit)
memory_ds.checkout("alt", create=True)
alt_commit = memory_ds.commit()
alt_commit = memory_ds.commit(allow_empty=True)

memory_ds.checkout("main")

Expand Down
4 changes: 4 additions & 0 deletions hub/util/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,10 @@ class CommitError(VersionControlError):
pass


class EmptyCommitError(CommitError):
pass


class TensorModifiedError(Exception):
def __init__(self):
super().__init__(
Expand Down