Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change progress bar backend to tqdm #2333

Merged
merged 68 commits into from Aug 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
1511fd6
add preliminary tqdm work
casperdcl Jul 20, 2019
0ebce9c
create dvc.progress.Tqdm, migrate dvc.remote.base
casperdcl Jul 21, 2019
70ee313
disable old progress
casperdcl Jul 27, 2019
c4b9d7d
minor bugfix
casperdcl Jul 27, 2019
cc28155
tqdm repo.checkout
casperdcl Jul 28, 2019
62cbeed
tqdm remote.http
casperdcl Jul 28, 2019
6aee6b6
add progress.Tqdm(bytes)
casperdcl Jul 28, 2019
3f0b85c
add progress.Tqdm(truncate)
casperdcl Jul 28, 2019
d460d7e
tqdm remote.s3, flake8
casperdcl Jul 28, 2019
f61e7d8
tqdm remote.azure
casperdcl Jul 28, 2019
85d4f7e
add progress.Tqdm(desc_truncate)
casperdcl Jul 28, 2019
bac3d93
tqdm remote.ssh.connection
casperdcl Jul 28, 2019
efd8a07
tqdm remote.local
casperdcl Jul 28, 2019
82bb550
tqdm dvc.utils
casperdcl Jul 28, 2019
d6f816b
remove old progress
casperdcl Jul 28, 2019
c02807d
tqdm remote.oss
casperdcl Jul 28, 2019
ca68eb4
add progress.Tqdm.update_desc(truncate=True)
casperdcl Jul 28, 2019
aed3451
pylint
casperdcl Jul 28, 2019
9143f67
black
casperdcl Jul 28, 2019
f88bf92
tqdm auto-disabling with dynamic log level
casperdcl Aug 10, 2019
56db2ee
misc minor restructuring
casperdcl Aug 10, 2019
2ac99d8
update dvc.progress test stub
casperdcl Aug 10, 2019
1925d63
fix silly error
casperdcl Aug 10, 2019
2f42db0
fix base progress_callback
casperdcl Aug 10, 2019
d0315a3
minor black, test fixes
casperdcl Aug 10, 2019
ee4248d
fix and update more tests
casperdcl Aug 11, 2019
120dfff
naive test update
casperdcl Aug 11, 2019
ea8f4ba
slight test update
casperdcl Aug 11, 2019
7665782
fix more callback usage
casperdcl Aug 11, 2019
7328ff4
update tqdm requirement
casperdcl Aug 11, 2019
0de87af
progress-aware logging
casperdcl Aug 11, 2019
56d1cca
log to correct stream
casperdcl Aug 11, 2019
032d507
persistent progress for large http files
casperdcl Aug 11, 2019
1d61745
ensure nested bars are cleared away
casperdcl Aug 11, 2019
f834e72
fix flake8
casperdcl Aug 11, 2019
fbaadcb
fix silly error
casperdcl Aug 12, 2019
f373655
minor potential kwargs bugfix
casperdcl Aug 13, 2019
838151d
tidy & avoid excessive requests
casperdcl Aug 13, 2019
3811141
minor tidy
casperdcl Aug 13, 2019
3f8caec
Merge remote-tracking branch 'upstream/master' into tqdm
casperdcl Aug 13, 2019
a1f5c03
tidy some ThreadPoolExecutor
casperdcl Aug 13, 2019
d9b4ecf
better auto-closing TqdmThreadPoolExecutor
casperdcl Aug 13, 2019
c6f73fd
flake8
casperdcl Aug 13, 2019
f88c2fe
remove old test
casperdcl Aug 13, 2019
186f0cc
a little tidy
casperdcl Aug 13, 2019
39da372
fix silly request header error
casperdcl Aug 13, 2019
8955a60
add .mailmap
casperdcl Aug 14, 2019
b018933
fix py2 MagicMock(sys.stderr).encoding in tests
casperdcl Aug 16, 2019
adf05e6
minor rename
casperdcl Aug 16, 2019
60566b3
minor test update
casperdcl Aug 16, 2019
534d6fd
remove defunct test, extraneous comments
casperdcl Aug 17, 2019
ac4d03e
remove auto-persistent bars
casperdcl Aug 17, 2019
093ad20
flake8 unused imports
casperdcl Aug 17, 2019
3098460
misc minor style fixes & optimisations
casperdcl Aug 17, 2019
16113c8
update to bleeding-edge StreamHandler.emit
casperdcl Aug 17, 2019
49d4930
add Tqdm units
casperdcl Aug 17, 2019
bd1fe67
probably correct oss2 Tqdm units (sparse Mandarin documentation)
casperdcl Aug 17, 2019
34dc163
Tqdm auto leave=False for nested bars
casperdcl Aug 17, 2019
9a2dea8
Tqdm longer descriptions
casperdcl Aug 17, 2019
f90fd17
update tqdm version
casperdcl Aug 18, 2019
13e76de
suppress MD5 description prefix
casperdcl Aug 18, 2019
6102caa
hopefully temp fix for uncalled checkout progress_callback
casperdcl Aug 18, 2019
1c8bf11
Merge remote-tracking branch 'upstream/master' into tqdm
casperdcl Aug 19, 2019
bee9064
fix uncalled `progress_callback`s
casperdcl Aug 19, 2019
8f7a7a5
fix logic
casperdcl Aug 19, 2019
9b73ff7
avoid duplication
casperdcl Aug 19, 2019
8faa02e
quick review fixes
casperdcl Aug 20, 2019
4bd2ce2
get_files_number test fix
casperdcl Aug 20, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -23,6 +23,7 @@ innosetup/config.ini
*.exe

.coverage
.coverage.*

*.swp

Expand Down
6 changes: 6 additions & 0 deletions .mailmap
@@ -0,0 +1,6 @@
Paweł Redzyński <pawelredzynski@gmail.com>
efiop marked this conversation as resolved.
Show resolved Hide resolved
Dmitry Petrov <dmitry.petrov@nevesomo.com>
Earl Hathaway <github@earlh.com>
Nabanita Dash <dashnabanita@gmail.com>
Kurian Benoy <kurian.bkk@gmail.com>
Sritanu Chakraborty <sritanu25@gmail.com>
25 changes: 14 additions & 11 deletions dvc/logger.py
Expand Up @@ -3,6 +3,7 @@
from __future__ import unicode_literals

from dvc.utils.compat import str, StringIO
from dvc.progress import Tqdm

import logging
import logging.handlers
Expand Down Expand Up @@ -53,9 +54,6 @@ class ColorFormatter(logging.Formatter):
)

def format(self, record):
if self._is_visible(record):
self._progress_aware()

if record.levelname == "INFO":
return record.msg

Expand Down Expand Up @@ -146,20 +144,25 @@ def _parse_exc(self, exc_info):

return (exception, stack_trace)

def _progress_aware(self):
efiop marked this conversation as resolved.
Show resolved Hide resolved
"""Add a new line if progress bar hasn't finished"""
from dvc.progress import progress

if not progress.is_finished:
progress._print()
progress.clearln()


class LoggerHandler(logging.StreamHandler):
def handleError(self, record):
super(LoggerHandler, self).handleError(record)
raise LoggingException(record)

def emit(self, record):
"""Write to Tqdm's stream so as to not break progressbars"""
try:
msg = self.format(record)
Tqdm.write(
msg, file=self.stream, end=getattr(self, "terminator", "\n")
)
self.flush()
except RecursionError:
raise
except Exception:
self.handleError(record)


def setup(level=logging.INFO):
colorama.init()
Expand Down
8 changes: 3 additions & 5 deletions dvc/output/base.py
Expand Up @@ -277,6 +277,7 @@ def download(self, to):

def checkout(self, force=False, progress_callback=None, tag=None):
if not self.use_cache:
progress_callback(str(self.path_info), self.get_files_number())
return

if tag:
Expand Down Expand Up @@ -313,13 +314,10 @@ def move(self, out):
self.repo.scm.ignore(self.fspath)

def get_files_number(self):
if not self.use_cache or not self.checksum:
if not self.use_cache:
return 0

if self.is_dir_checksum:
return len(self.dir_cache)

return 1
return self.cache.get_files_number(self.checksum)

def unprotect(self):
if self.exists:
Expand Down
222 changes: 81 additions & 141 deletions dvc/progress.py
@@ -1,154 +1,94 @@
"""Manages progress bars for dvc repo."""

from __future__ import print_function
from __future__ import unicode_literals

from dvc.utils.compat import str

import sys
import threading
import logging
from tqdm import tqdm
from copy import deepcopy
from concurrent.futures import ThreadPoolExecutor

CLEARLINE_PATTERN = "\r\x1b[K"


class Progress(object):
class TqdmThreadPoolExecutor(ThreadPoolExecutor):
"""
Simple multi-target progress bar.
Ensure worker progressbars are cleared away properly.
"""

def __init__(self):
self._n_total = 0
self._n_finished = 0
self._lock = threading.Lock()
self._line = None

def set_n_total(self, total):
"""Sets total number of targets."""
self._n_total = total
self._n_finished = 0

@property
def is_finished(self):
"""Returns if all targets have finished."""
return self._n_total == self._n_finished

def clearln(self):
self._print(CLEARLINE_PATTERN, end="")

def _writeln(self, line):
self.clearln()
self._print(line, end="")
sys.stdout.flush()

def reset(self):
with self._lock:
self._n_total = 0
self._n_finished = 0
self._line = None

def refresh(self, line=None):
"""Refreshes progress bar."""
# Just go away if it is locked. Will update next time
if not self._lock.acquire(False):
return

if line is None:
line = self._line

if sys.stdout.isatty() and line is not None:
self._writeln(line)
self._line = line

self._lock.release()

def update_target(self, name, current, total):
"""Updates progress bar for a specified target."""
self.refresh(self._bar(name, current, total))

def finish_target(self, name):
"""Finishes progress bar for a specified target."""
# We have to write a msg about finished target
with self._lock:
pbar = self._bar(name, 100, 100)

if sys.stdout.isatty():
self.clearln()

self._print(pbar)

self._n_finished += 1
self._line = None

def _bar(self, target_name, current, total):
def __enter__(self):
"""
Make a progress bar out of info, which looks like:
(1/2): [########################################] 100% master.zip
Creates a blank initial dummy progress bar if needed so that workers
are forced to create "nested" bars.
"""
bar_len = 30

if total is None:
state = 0
percent = "?% "
else:
total = int(total)
state = int((100 * current) / total) if current < total else 100
percent = str(state) + "% "

if self._n_total > 1:
num = "({}/{}): ".format(self._n_finished + 1, self._n_total)
else:
num = ""
blank_bar = Tqdm(bar_format="Multi-Threaded:", leave=False)
if blank_bar.pos > 0:
# already nested - don't need a placeholder bar
blank_bar.close()
self.bar = blank_bar
return super(TqdmThreadPoolExecutor, self).__enter__()

n_sh = int((state * bar_len) / 100)
n_sp = bar_len - n_sh
pbar = "[" + "#" * n_sh + " " * n_sp + "] "
def __exit__(self, *a, **k):
super(TqdmThreadPoolExecutor, self).__exit__(*a, **k)
self.bar.close()

return num + pbar + percent + target_name

@staticmethod
def _print(*args, **kwargs):
import logging

logger = logging.getLogger(__name__)

if logger.getEffectiveLevel() == logging.CRITICAL:
return

print(*args, **kwargs)

def __enter__(self):
self._lock.acquire(True)
if self._line is not None:
self.clearln()

def __exit__(self, typ, value, tbck):
if self._line is not None:
self.refresh()
self._lock.release()

def __call__(self, seq, name="", total=None):
if total is None:
total = len(seq)

self.update_target(name, 0, total)
for done, item in enumerate(seq, start=1):
yield item
self.update_target(name, done, total)
self.finish_target(name)


class ProgressCallback(object):
def __init__(self, total):
self.total = total
self.current = 0
progress.reset()

def update(self, name, progress_to_add=1):
self.current += progress_to_add
progress.update_target(name, self.current, self.total)

def finish(self, name):
progress.finish_target(name)

class Tqdm(tqdm):
"""
maximum-compatibility tqdm-based progressbars
"""

progress = Progress() # pylint: disable=invalid-name
def __init__(
self,
iterable=None,
disable=None,
bytes=False, # pylint: disable=W0622
desc_truncate=None,
leave=None,
**kwargs
):
"""
bytes : shortcut for
`unit='B', unit_scale=True, unit_divisor=1024, miniters=1`
desc_truncate : like `desc` but will truncate to 10 chars
kwargs : anything accepted by `tqdm.tqdm()`
"""
kwargs = deepcopy(kwargs)
if bytes:
for k, v in dict(
unit="B", unit_scale=True, unit_divisor=1024, miniters=1
).items():
kwargs.setdefault(k, v)
if desc_truncate is not None:
kwargs.setdefault("desc", self.truncate(desc_truncate))
if disable is None:
disable = (
logging.getLogger(__name__).getEffectiveLevel()
>= logging.CRITICAL
)
super(Tqdm, self).__init__(
iterable=iterable, disable=disable, leave=leave, **kwargs
)

def update_desc(self, desc, n=1, truncate=True):
"""
Calls `set_description(truncate(desc))` and `update(n)`
"""
self.set_description(
self.truncate(desc) if truncate else desc, refresh=False
)
self.update(n)

def update_to(self, current, total=None):
if total:
self.total = total # pylint: disable=W0613,W0201
self.update(current - self.n)

@classmethod
def truncate(cls, s, max_len=25, end=True, fill="..."):
"""
Guarantee len(output) < max_lenself.
>>> truncate("hello", 4)
'...o'
"""
if len(s) <= max_len:
return s
if len(fill) > max_len:
return fill[-max_len:] if end else fill[:max_len]
i = max_len - len(fill)
return (fill + s[-i:]) if end else (s[:i] + fill)
36 changes: 19 additions & 17 deletions dvc/remote/azure.py
Expand Up @@ -17,7 +17,7 @@
BlockBlobService = None

from dvc.utils.compat import urlparse
from dvc.progress import progress
from dvc.progress import Tqdm
from dvc.config import Config
from dvc.remote.base import RemoteBASE
from dvc.path_info import CloudURLInfo
Expand All @@ -26,14 +26,6 @@
logger = logging.getLogger(__name__)


class Callback(object):
def __init__(self, name):
self.name = name

def __call__(self, current, total):
progress.update_target(self.name, current, total)


class RemoteAZURE(RemoteBASE):
scheme = Schemes.AZURE
path_cls = CloudURLInfo
Expand Down Expand Up @@ -123,18 +115,28 @@ def list_cache_paths(self):
def _upload(
self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs
):
cb = None if no_progress_bar else Callback(name)
self.blob_service.create_blob_from_path(
to_info.bucket, to_info.path, from_file, progress_callback=cb
)
with Tqdm(
desc_truncate=name, disable=no_progress_bar, bytes=True
) as pbar:
self.blob_service.create_blob_from_path(
to_info.bucket,
to_info.path,
from_file,
progress_callback=pbar.update_to,
)

def _download(
self, from_info, to_file, name=None, no_progress_bar=False, **_kwargs
):
cb = None if no_progress_bar else Callback(name)
self.blob_service.get_blob_to_path(
from_info.bucket, from_info.path, to_file, progress_callback=cb
)
with Tqdm(
desc_truncate=name, disable=no_progress_bar, bytes=True
) as pbar:
self.blob_service.get_blob_to_path(
from_info.bucket,
from_info.path,
to_file,
progress_callback=pbar.update_to,
)

def exists(self, path_info):
paths = self._list_paths(path_info.bucket, path_info.path)
Expand Down