From 8f484f6d0abf56e50d5ac43fdbd7f3eb85db695a Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Sat, 10 Jul 2021 22:11:37 +0530
Subject: [PATCH 1/4] fix tensor create

---
 hub/api/dataset.py            |  8 ++++----
 hub/api/tests/test_api.py     | 11 +++++++++++
 hub/core/storage/lru_cache.py |  8 +++++---
 hub/core/storage/provider.py  |  4 ++--
 hub/core/tensor.py            |  1 +
 5 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/hub/api/dataset.py b/hub/api/dataset.py
index 4ec6afcaa4..16a13e54a8 100644
--- a/hub/api/dataset.py
+++ b/hub/api/dataset.py
@@ -1,6 +1,6 @@
 from hub.core.tensor import create_tensor
 from hub.constants import DEFAULT_HTYPE
-from typing import Callable, Dict, Optional, Union, Tuple, List
+from typing import Callable, Dict, Optional, Union, Tuple, List, Sequence
 import numpy as np
 
 from hub.api.tensor import Tensor
@@ -196,7 +196,7 @@ def create_tensor(
 
         self.tensors[name] = tensor
         self.meta.tensors.append(name)
-
+        self.flush([get_dataset_meta_key()])
         return tensor
 
     __getattr__ = __getitem__
@@ -311,13 +311,13 @@ def tensorflow(self):
         """
         return dataset_to_tensorflow(self)
 
-    def flush(self):
+    def flush(self, keys: Optional[Sequence[str]] = None):
         """Necessary operation after writes if caches are being used.
         Writes all the dirty data from the cache layers (if any) to the underlying storage.
         Here dirty data corresponds to data that has been changed/assigned and but hasn't yet been sent to the
         underlying storage.
         """
-        self.storage.flush()
+        self.storage.flush(keys)
 
     def clear_cache(self):
         """Flushes (see Dataset.flush documentation) the contents of the cache layers (if any) and then deletes contents
diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py
index 5e7e9399e0..6530362969 100644
--- a/hub/api/tests/test_api.py
+++ b/hub/api/tests/test_api.py
@@ -10,6 +10,7 @@
 from hub.util.exceptions import TensorDtypeMismatchError, TensorInvalidSampleShapeError
 from hub.client.client import HubBackendClient
 from hub.client.utils import has_hub_testing_creds
+from click.testing import CliRunner
 
 
 # need this for 32-bit and 64-bit systems to have correct tests
@@ -464,3 +465,13 @@ def test_hub_cloud_dataset():
         np.testing.assert_array_equal(ds.image[i].numpy(), i * np.ones((100, 100)))
 
     ds.delete()
+
+
+def test_empty_dataset():
+    with CliRunner().isolated_filesystem():
+        ds = Dataset("test")
+        ds.create_tensor("x")
+        ds.create_tensor("y")
+        ds.create_tensor("z")
+        ds = Dataset("test")
+        assert list(ds.tensors) == ["x", "y", "z"]
diff --git a/hub/core/storage/lru_cache.py b/hub/core/storage/lru_cache.py
index b957f62911..95979f24db 100644
--- a/hub/core/storage/lru_cache.py
+++ b/hub/core/storage/lru_cache.py
@@ -1,6 +1,6 @@
 from collections import OrderedDict
 from hub.core.storage.cachable import Cachable
-from typing import Callable, Set, Union
+from typing import Callable, Set, Union, Optional, Sequence
 
 from hub.core.storage.provider import StorageProvider
 
@@ -37,12 +37,14 @@ def __init__(
         self.dirty_keys: Set[str] = set()  # keys present in cache but not next_storage
         self.cache_used = 0
 
-    def flush(self):
+    def flush(self, keys: Optional[Sequence[str]] = None):
         """Writes data from cache_storage to next_storage. Only the dirty keys are written.
         This is a cascading function and leads to data being written to the final storage in case of a chained cache.
         """
+        if not keys:
+            keys = self.dirty_keys.copy()  # type: ignore
         self.check_readonly()
-        for key in self.dirty_keys.copy():
+        for key in keys:  # type: ignore
             self._forward(key)
         self.next_storage.flush()
 
diff --git a/hub/core/storage/provider.py b/hub/core/storage/provider.py
index 9576be33ad..cdbaf9ddba 100644
--- a/hub/core/storage/provider.py
+++ b/hub/core/storage/provider.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import MutableMapping
 from hub.core.storage.cachable import Cachable
-from typing import Optional
+from typing import Optional, Sequence
 
 from hub.constants import BYTE_PADDING
 from hub.util.assert_byte_indexes import assert_byte_indexes
@@ -140,7 +140,7 @@ def check_readonly(self):
         if hasattr(self, "read_only") and self.read_only:
             raise ReadOnlyModeError()
 
-    def flush(self):
+    def flush(self, keys: Optional[Sequence[str]] = None):
         """Only needs to be implemented for caches. Flushes the data to the next storage provider.
         Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc.
         """
diff --git a/hub/core/tensor.py b/hub/core/tensor.py
index bc5858c567..75884253d9 100644
--- a/hub/core/tensor.py
+++ b/hub/core/tensor.py
@@ -43,3 +43,4 @@ def create_tensor(
         **kwargs,
     )
     storage[meta_key] = meta  # type: ignore
+    storage.flush()

From dbfbb58680d89239fc0ac2012b67dbab3caf4955 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Sat, 10 Jul 2021 22:13:53 +0530
Subject: [PATCH 2/4] rem mad flush

---
 hub/core/tensor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hub/core/tensor.py b/hub/core/tensor.py
index 75884253d9..bc5858c567 100644
--- a/hub/core/tensor.py
+++ b/hub/core/tensor.py
@@ -43,4 +43,3 @@ def create_tensor(
         **kwargs,
     )
     storage[meta_key] = meta  # type: ignore
-    storage.flush()

From 60f30ff7483c31efb44e99b1769641826b78ef3e Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Sat, 10 Jul 2021 22:30:27 +0530
Subject: [PATCH 3/4] smol fix

---
 hub/core/tensor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hub/core/tensor.py b/hub/core/tensor.py
index bc5858c567..e82bd0531a 100644
--- a/hub/core/tensor.py
+++ b/hub/core/tensor.py
@@ -43,3 +43,4 @@ def create_tensor(
         **kwargs,
     )
     storage[meta_key] = meta  # type: ignore
+    storage.flush([meta_key])

From ba446b01dd15bd42b1b0bff027e539094684e2bd Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Mon, 12 Jul 2021 15:47:02 +0530
Subject: [PATCH 4/4] simplify

---
 hub/api/dataset.py            | 10 +++++-----
 hub/core/storage/lru_cache.py |  8 +++-----
 hub/core/storage/provider.py  |  4 ++--
 hub/core/tensor.py            |  1 -
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/hub/api/dataset.py b/hub/api/dataset.py
index 16a13e54a8..dabd61e6ee 100644
--- a/hub/api/dataset.py
+++ b/hub/api/dataset.py
@@ -1,6 +1,6 @@
 from hub.core.tensor import create_tensor
 from hub.constants import DEFAULT_HTYPE
-from typing import Callable, Dict, Optional, Union, Tuple, List, Sequence
+from typing import Callable, Dict, Optional, Union, Tuple, List
 import numpy as np
 
 from hub.api.tensor import Tensor
@@ -182,6 +182,7 @@ def create_tensor(
         if tensor_exists(name, self.storage):
             raise TensorAlreadyExistsError(name)
 
+        self.meta.tensors.append(name)
         create_tensor(
             name,
             self.storage,
@@ -195,8 +196,7 @@ def create_tensor(
         tensor = Tensor(name, self.storage)  # type: ignore
 
         self.tensors[name] = tensor
-        self.meta.tensors.append(name)
-        self.flush([get_dataset_meta_key()])
+
         return tensor
 
     __getattr__ = __getitem__
@@ -311,13 +311,13 @@ def tensorflow(self):
         """
         return dataset_to_tensorflow(self)
 
-    def flush(self, keys: Optional[Sequence[str]] = None):
+    def flush(self):
         """Necessary operation after writes if caches are being used.
         Writes all the dirty data from the cache layers (if any) to the underlying storage.
         Here dirty data corresponds to data that has been changed/assigned and but hasn't yet been sent to the
         underlying storage.
         """
-        self.storage.flush(keys)
+        self.storage.flush()
 
     def clear_cache(self):
         """Flushes (see Dataset.flush documentation) the contents of the cache layers (if any) and then deletes contents
diff --git a/hub/core/storage/lru_cache.py b/hub/core/storage/lru_cache.py
index 95979f24db..b957f62911 100644
--- a/hub/core/storage/lru_cache.py
+++ b/hub/core/storage/lru_cache.py
@@ -1,6 +1,6 @@
 from collections import OrderedDict
 from hub.core.storage.cachable import Cachable
-from typing import Callable, Set, Union, Optional, Sequence
+from typing import Callable, Set, Union
 
 from hub.core.storage.provider import StorageProvider
 
@@ -37,14 +37,12 @@ def __init__(
         self.dirty_keys: Set[str] = set()  # keys present in cache but not next_storage
         self.cache_used = 0
 
-    def flush(self, keys: Optional[Sequence[str]] = None):
+    def flush(self):
         """Writes data from cache_storage to next_storage. Only the dirty keys are written.
         This is a cascading function and leads to data being written to the final storage in case of a chained cache.
         """
-        if not keys:
-            keys = self.dirty_keys.copy()  # type: ignore
         self.check_readonly()
-        for key in keys:  # type: ignore
+        for key in self.dirty_keys.copy():
             self._forward(key)
         self.next_storage.flush()
 
diff --git a/hub/core/storage/provider.py b/hub/core/storage/provider.py
index cdbaf9ddba..9576be33ad 100644
--- a/hub/core/storage/provider.py
+++ b/hub/core/storage/provider.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import MutableMapping
 from hub.core.storage.cachable import Cachable
-from typing import Optional, Sequence
+from typing import Optional
 
 from hub.constants import BYTE_PADDING
 from hub.util.assert_byte_indexes import assert_byte_indexes
@@ -140,7 +140,7 @@ def check_readonly(self):
         if hasattr(self, "read_only") and self.read_only:
             raise ReadOnlyModeError()
 
-    def flush(self, keys: Optional[Sequence[str]] = None):
+    def flush(self):
         """Only needs to be implemented for caches. Flushes the data to the next storage provider.
         Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc.
         """
diff --git a/hub/core/tensor.py b/hub/core/tensor.py
index e82bd0531a..bc5858c567 100644
--- a/hub/core/tensor.py
+++ b/hub/core/tensor.py
@@ -43,4 +43,3 @@ def create_tensor(
         **kwargs,
     )
     storage[meta_key] = meta  # type: ignore
-    storage.flush([meta_key])