|
9 | 9 | from contextlib import contextmanager
|
10 | 10 | from glob import has_magic
|
11 | 11 |
|
| 12 | +from .callbacks import as_callback, branch |
12 | 13 | from .exceptions import FSTimeoutError
|
13 | 14 | from .spec import AbstractFileSystem
|
14 | 15 | from .utils import PY36, is_exception, other_paths
|
@@ -154,26 +155,29 @@ def _get_batch_size():
|
154 | 155 | return soft_limit // 8
|
155 | 156 |
|
156 | 157 |
|
157 |
| -async def _throttled_gather(coros, batch_size=None, **gather_kwargs): |
| 158 | +async def _run_coros_in_chunks(coros, batch_size=None, callback=None, timeout=None): |
158 | 159 | """Run the given coroutines in smaller chunks to
|
159 | 160 | not crossing the file descriptor limit.
|
160 | 161 |
|
161 | 162 | If batch_size parameter is -1, then it will not be any throttling. If
|
162 | 163 | it is none, it will be inferred from the process resources (soft limit divided
|
163 | 164 | by 8) and fallback to 128 if the system doesn't support it."""
|
164 | 165 |
|
| 166 | + callback = as_callback(callback) |
165 | 167 | if batch_size is None:
|
166 | 168 | batch_size = _get_batch_size()
|
167 | 169 |
|
168 | 170 | if batch_size == -1:
|
169 |
| - return await asyncio.gather(*coros, **gather_kwargs) |
| 171 | + batch_size = len(coros) |
170 | 172 |
|
171 | 173 | assert batch_size > 0
|
172 | 174 |
|
173 | 175 | results = []
|
174 | 176 | for start in range(0, len(coros), batch_size):
|
175 | 177 | chunk = coros[start : start + batch_size]
|
176 |
| - results.extend(await asyncio.gather(*chunk, **gather_kwargs)) |
| 178 | + for coro in asyncio.as_completed(chunk, timeout=timeout): |
| 179 | + results.append(await coro) |
| 180 | + callback.call("relative_update", 1) |
177 | 181 | return results
|
178 | 182 |
|
179 | 183 |
|
@@ -340,13 +344,16 @@ async def _put(self, lpath, rpath, recursive=False, **kwargs):
|
340 | 344 | fs = LocalFileSystem()
|
341 | 345 | lpaths = fs.expand_path(lpath, recursive=recursive)
|
342 | 346 | rpaths = other_paths(lpaths, rpath)
|
| 347 | + callback = as_callback(kwargs.pop("callback", None)) |
343 | 348 | batch_size = kwargs.pop("batch_size", self.batch_size)
|
344 |
| - return await _throttled_gather( |
345 |
| - [ |
346 |
| - self._put_file(lpath, rpath, **kwargs) |
347 |
| - for lpath, rpath in zip(lpaths, rpaths) |
348 |
| - ], |
349 |
| - batch_size=batch_size, |
| 349 | + |
| 350 | + coros = [] |
| 351 | + callback.lazy_call("set_size", len, lpaths) |
| 352 | + for lpath, rpath in zip(lpaths, rpaths): |
| 353 | + branch(callback, lpath, rpath, kwargs) |
| 354 | + coros.append(self._get_file(lpath, rpath, **kwargs)) |
| 355 | + return await _run_coros_in_chunks( |
| 356 | + coros, batch_size=batch_size, callback=callback |
350 | 357 | )
|
351 | 358 |
|
352 | 359 | async def _get_file(self, rpath, lpath, **kwargs):
|
@@ -374,13 +381,16 @@ async def _get(self, rpath, lpath, recursive=False, **kwargs):
|
374 | 381 | rpaths = await self._expand_path(rpath, recursive=recursive)
|
375 | 382 | lpaths = other_paths(rpaths, lpath)
|
376 | 383 | [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
|
| 384 | + callback = as_callback(kwargs.pop("callback", None)) |
377 | 385 | batch_size = kwargs.pop("batch_size", self.batch_size)
|
378 |
| - return await _throttled_gather( |
379 |
| - [ |
380 |
| - self._get_file(rpath, lpath, **kwargs) |
381 |
| - for lpath, rpath in zip(lpaths, rpaths) |
382 |
| - ], |
383 |
| - batch_size=batch_size, |
| 386 | + |
| 387 | + coros = [] |
| 388 | + callback.lazy_call("set_size", len, lpaths) |
| 389 | + for lpath, rpath in zip(lpaths, rpaths): |
| 390 | + branch(callback, rpath, lpath, kwargs) |
| 391 | + coros.append(self._get_file(rpath, lpath, **kwargs)) |
| 392 | + return await _run_coros_in_chunks( |
| 393 | + coros, batch_size=batch_size, callback=callback |
384 | 394 | )
|
385 | 395 |
|
386 | 396 | async def _isfile(self, path):
|
|
0 commit comments