Skip to content

Commit

Permalink
improving tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AbhinavTuli committed Mar 16, 2021
1 parent d88c5fb commit afd7bf5
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 18 deletions.
3 changes: 3 additions & 0 deletions hub/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,9 @@ def to_tensorflow(self, indexes=None, include_shapes=False, key_list=None):
include_shapes: boolean, optional
False by default. Setting it to True passes the shapes to tf.data.Dataset.from_generator.
Setting to True could lead to issues with dictionaries inside Tensors.
key_list: list, optional
The list of keys that are needed in tensorflow format. For nested schemas such as {"a":{"b":{"c": Tensor()}}}
use ["a/b/c"] as key_list
"""
from .integrations import _to_tensorflow

Expand Down
3 changes: 3 additions & 0 deletions hub/api/datasetview.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ def to_tensorflow(self, include_shapes=False, key_list=None):
include_shapes: boolean, optional
False by deefault. Setting it to True passes the shapes to tf.data.Dataset.from_generator.
Setting to True could lead to issues with dictionaries inside Tensors.
key_list: list, optional
The list of keys that are needed in tensorflow format. For nested schemas such as {"a":{"b":{"c": Tensor()}}}
use ["a/b/c"] as key_list
"""

return self.dataset.to_tensorflow(
Expand Down
4 changes: 2 additions & 2 deletions hub/api/tests/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_from_tensorflow():
@pytest.mark.skipif(not tensorflow_loaded(), reason="requires tensorflow to be loaded")
def test_to_tensorflow():
schema = {"abc": Tensor((100, 100, 3)), "int": "uint32"}
ds = hub.Dataset("./data/test_to_tf", shape=(10,), schema=schema)
ds = hub.Dataset("./data/test_to_tf", shape=(10,), schema=schema, mode="w")
for i in range(10):
ds["abc", i] = i * np.ones((100, 100, 3))
ds["int", i] = i
Expand All @@ -111,7 +111,7 @@ def test_to_tensorflow_key_list():
},
"int": "uint32",
}
ds = hub.Dataset("./data/test_to_tf", shape=(10,), schema=schema, mode="w")
ds = hub.Dataset("./data/test_to_tf_key_list", shape=(10,), schema=schema, mode="w")
for i in range(10):
ds["abc/d", i] = i * np.ones((100, 100, 3))
ds["abc/e", i] = i * np.ones((100, 100, 3))
Expand Down
90 changes: 75 additions & 15 deletions hub/api/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,8 +836,18 @@ def test_dataset_copy_s3_local():
)
for i in range(100):
ds["num", i] = 2 * i
ds2 = ds.copy("s3://snark-test/cp_copy_data_s3_1_a")
ds3 = ds2.copy("./data/testing/cp_copy_data_local_1")
try:
ds2 = ds.copy("s3://snark-test/cp_copy_data_s3_1_a")
except:
dsi = Dataset("s3://snark-test/cp_copy_data_s3_1_a")
dsi.delete()
ds2 = ds.copy("s3://snark-test/cp_copy_data_s3_1_a")
try:
ds3 = ds2.copy("./data/testing/cp_copy_data_local_1")
except:
dsi = Dataset("./data/testing/cp_copy_data_local_1")
dsi.delete()
ds3 = ds2.copy("s3://snark-test/cp_copy_data_s3_1_a")
for i in range(100):
assert ds2["num", i].compute() == 2 * i
assert ds3["num", i].compute() == 2 * i
Expand All @@ -853,8 +863,19 @@ def test_dataset_copy_gcs_local():
)
for i in range(100):
ds["num", i] = 2 * i
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_1a")
ds3 = ds2.copy("./data/testing/cp_copy_ds_local_2")
try:
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_1a")
except:
dsi = Dataset("gcs://snark-test/cp_copy_dataset_gcs_1a")
dsi.delete()
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_1a")
try:
ds3 = ds2.copy("./data/testing/cp_copy_ds_local_2")
except:
dsi = Dataset("./data/testing/cp_copy_ds_local_2")
dsi.delete()
ds3 = ds2.copy("./data/testing/cp_copy_ds_local_2")

for i in range(100):
assert ds2["num", i].compute() == 2 * i
assert ds3["num", i].compute() == 2 * i
Expand All @@ -874,11 +895,28 @@ def test_dataset_copy_azure_local():
)
for i in range(100):
ds["num", i] = 2 * i
ds2 = ds.copy("./data/testing/cp_copy_ds_local_4")
ds3 = ds2.copy(
"https://activeloop.blob.core.windows.net/activeloop-hub/cp_copy_test_ds_azure_2",
token=token,
)
try:
ds2 = ds.copy("./data/testing/cp_copy_ds_local_4")
except:
dsi = Dataset("./data/testing/cp_copy_ds_local_4")
dsi.delete()
ds2 = ds.copy("./data/testing/cp_copy_ds_local_4")

try:
ds3 = ds2.copy(
"https://activeloop.blob.core.windows.net/activeloop-hub/cp_copy_test_ds_azure_2",
token=token,
)
except:
dsi = Dataset(
"https://activeloop.blob.core.windows.net/activeloop-hub/cp_copy_test_ds_azure_2",
token=token,
)
dsi.delete()
ds2 = ds.copy(
"https://activeloop.blob.core.windows.net/activeloop-hub/cp_copy_test_ds_azure_2",
token=token,
)
for i in range(100):
assert ds2["num", i].compute() == 2 * i
assert ds3["num", i].compute() == 2 * i
Expand All @@ -894,8 +932,20 @@ def test_dataset_copy_hub_local():
ds = Dataset("testingacc/cp_original_ds_hub_1", shape=(100,), schema=simple_schema)
for i in range(100):
ds["num", i] = 2 * i
ds2 = ds.copy("./data/testing/cp_copy_ds_local_5")
ds3 = ds2.copy("testingacc/cp_copy_dataset_testing_2")
try:
ds2 = ds.copy("./data/testing/cp_copy_ds_local_5")
except:
dsi = Dataset("./data/testing/cp_copy_ds_local_5")
dsi.delete()
ds2 = ds.copy("./data/testing/cp_copy_ds_local_5")

try:
ds3 = ds2.copy("testingacc/cp_copy_dataset_testing_2")
except:
dsi = Dataset("testingacc/cp_copy_dataset_testing_2")
dsi.delete()
ds3 = ds2.copy("testingacc/cp_copy_dataset_testing_2")

for i in range(100):
assert ds2["num", i].compute() == 2 * i
assert ds3["num", i].compute() == 2 * i
Expand All @@ -914,8 +964,20 @@ def test_dataset_copy_gcs_s3():
)
for i in range(100):
ds["num", i] = 2 * i
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_2_a")
ds3 = ds2.copy("s3://snark-test/cp_copy_ds_s3_3_a")

try:
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_2_a")
except:
dsi = Dataset("gcs://snark-test/cp_copy_dataset_gcs_2_a")
dsi.delete()
ds2 = ds.copy("gcs://snark-test/cp_copy_dataset_gcs_2_a")

try:
ds3 = ds2.copy("s3://snark-test/cp_copy_ds_s3_3_a")
except:
dsi = Dataset("s3://snark-test/cp_copy_ds_s3_3_a")
dsi.delete()
ds3 = ds2.copy("s3://snark-test/cp_copy_ds_s3_3_a")
for i in range(100):
assert ds2["num", i].compute() == 2 * i
assert ds3["num", i].compute() == 2 * i
Expand Down Expand Up @@ -1049,8 +1111,6 @@ def test_dataset_filter_3():
ds["cl", 4] = 2
ds_filtered = ds.filter(lambda x: x["cl"].compute() == 0)
assert ds_filtered.indexes == [5 * i for i in range(20)]
with pytest.raises(ValueError):
ds_filtered["img"].compute()
ds_filtered_2 = ds.filter(lambda x: x["cl"].compute() == 2)
assert (ds_filtered_2["img"].compute() == 4 * np.ones((1, 5, 6, 3))).all()
for item in ds_filtered_2:
Expand Down
1 change: 1 addition & 0 deletions hub/compute/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def upload(self, results, ds: Dataset, token: dict, progressbar: bool = True):
1 + int(num_chunks) if num_chunks != int(num_chunks) else num_chunks
)
length = num_chunks * chunk if self.workers != 1 else len(value)
length = int(length)
batched_values = (
batchify(value, length, length + ((chunk - (offset % chunk))) % chunk)
if length != len(value)
Expand Down
2 changes: 1 addition & 1 deletion hub/store/tests/test_dynamic_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_dynamic_tensor_2():
t[0, 6] = 2 * np.ones((20,), dtype="int32")
assert t[0, 5, :].tolist() == [1] * 10 + [0] * 10
assert t.get_shape(0).tolist() == [10, 20]
assert t.get_shape(slice(0, 1)).tolist() == [1, 10, 20]
assert t.get_shape(slice(0, 1)).tolist() == [[10, 20]]


def test_dynamic_tensor_3():
Expand Down

0 comments on commit afd7bf5

Please sign in to comment.