horovod · chongxiaoc · Aug 19, 2021 · Aug 19, 2021
diff --git a/horovod/data/data_loader_base.py b/horovod/data/data_loader_base.py
@@ -71,24 +71,19 @@ def __init__(self, async_loader_queue_size=64, *args, **kwargs):
             self.thread.daemon = True
             self.started = False
 
-    def __del__(self):
-        self._close_async_loader()
-        s = super()
-        if hasattr(s, "__del__"):
-            s.__del__(self)
-
-    def _close_async_loader(self):
+    def close_async_loader(self):
         """
         Close the async data loader.
         """
         print("Closing the AsyncDataLoaderMixin.")
         if self.async_loader_queue_size > 0 and self.started:
             self.finished_event.set()
-            try:
-                # Free buffer to allow worker to retry
-                self.queue.get_nowait()
-            except Empty:
-                pass
+            while True:
+                try:
+                    # Drain buffer
+                    self.queue.get_nowait()
+                except Empty:
+                    break
             self.thread.join()
 
     def _async_worker(self):

diff --git a/horovod/spark/lightning/datamodule.py b/horovod/spark/lightning/datamodule.py
@@ -65,9 +65,14 @@ def setup(self, stage=None):
     def teardown(self, stage=None):
         if stage == "fit" or stage is None:
             if self.verbose:
-                print("Tear down petastorm readers")
+                print("Tear down: closing async dataloaders")
+            self.train_dl.close_async_loader()
+            if self.has_val:
+                self.val_dl.close_async_loader()
             if not self.inmemory_cache_all:
                 # Reader was loaded once and stopped for inmemory datalaoder.
+                if self.verbose:
+                    print("Tear down: closing petastorm readers")
                 self.train_reader.stop()
                 self.train_reader.join()
                 if self.has_val:
@@ -90,7 +95,8 @@ def train_dataloader(self):
             dataloader_class = PytorchInfiniteAsyncDataLoader
             kwargs['shuffling_queue_capacity'] = self.shuffle_size
 
-        return dataloader_class(**kwargs)
+        self.train_dl = dataloader_class(**kwargs)
+        return self.train_dl
 
     def val_dataloader(self):
         if not self.has_val:
@@ -110,4 +116,5 @@ def val_dataloader(self):
             dataloader_class = PytorchInfiniteAsyncDataLoader
             kwargs['shuffling_queue_capacity'] = 0
 
-        return dataloader_class(**kwargs)
+        self.val_dl = dataloader_class(**kwargs)
+        return self.val_dl