From 6fcb35bad43901e9803ea90163b2171d71729ec0 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Thu, 3 Aug 2023 14:58:30 +0530 Subject: [PATCH 1/2] speedup --- deeplake/util/transform.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/deeplake/util/transform.py b/deeplake/util/transform.py index 69b68ba2a1..4056b8273b 100644 --- a/deeplake/util/transform.py +++ b/deeplake/util/transform.py @@ -202,6 +202,9 @@ def _transform_and_append_data_slice( pipeline_checked = False + last_pg_update_time = time.time() + progress = 0 + for i, sample in enumerate( (data_slice[i : i + 1] for i in range(n)) if pd and isinstance(data_slice, pd.DataFrame) @@ -237,7 +240,13 @@ def _transform_and_append_data_slice( skipped_samples_in_current_batch = 0 if pg_callback is not None: - pg_callback(1) + progress += 1 + if ( + time.time() - last_pg_update_time + > TRANSFORM_PROGRESSBAR_UPDATE_INTERVAL + or i == n - 1 + ): + pg_callback(progress) # failure at chunk_engine # retry one sample at a time From 5e38cbb30c2496eeee43f3a4986af6c617850355 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Thu, 3 Aug 2023 15:01:39 +0530 Subject: [PATCH 2/2] fix --- deeplake/util/transform.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deeplake/util/transform.py b/deeplake/util/transform.py index 4056b8273b..7af518e847 100644 --- a/deeplake/util/transform.py +++ b/deeplake/util/transform.py @@ -247,6 +247,8 @@ def _transform_and_append_data_slice( or i == n - 1 ): pg_callback(progress) + progress = 0 + last_pg_update_time = time.time() # failure at chunk_engine # retry one sample at a time