Skip to content

Commit

Permalink
Added some simple instrumentation to HTTP fetch: elapsed time (per fi…
Browse files Browse the repository at this point in the history
…le and overall), MB/sec transfer rate.

Increased requests iter_content chunk size to 10MB.
  • Loading branch information
mikedarcy committed Feb 3, 2017
1 parent 41932e8 commit 89dbac4
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
4 changes: 4 additions & 0 deletions bdbag/fetch/fetcher.py
@@ -1,4 +1,5 @@
import sys
import datetime
import logging
from bdbag.fetch.transports import *
from bdbag.fetch.auth.keychain import *
Expand Down Expand Up @@ -27,6 +28,7 @@ def fetch_bag_files(bag, keychain_file, force=False, callback=None):
auth = read_keychain(keychain_file)
current = 0
total = 0 if not callback else len(set(bag.files_to_be_fetched()))
start = datetime.datetime.now()
for url, size, path in bag.fetch_entries():
output_path = os.path.normpath(os.path.join(bag.path, path))
if not force and os.path.exists(output_path) and os.path.getsize(output_path) == int(size):
Expand All @@ -40,6 +42,8 @@ def fetch_bag_files(bag, keychain_file, force=False, callback=None):
if not callback(current, total):
logger.warn("Fetch cancelled by user...")
break
elapsed = datetime.datetime.now() - start
logger.info("Fetch complete. Elapsed time: %s" % elapsed)
cleanup_transports()
return success

Expand Down
16 changes: 12 additions & 4 deletions bdbag/fetch/transports/fetch_http.py
@@ -1,5 +1,6 @@
import os
import sys
import datetime
import logging
import requests
from requests.adapters import HTTPAdapter
Expand All @@ -15,7 +16,7 @@

logger = logging.getLogger(__name__)

CHUNK_SIZE = 1024 * 1024
CHUNK_SIZE = 1024 * 10240
SESSIONS = dict()
HEADERS = {'Connection': 'keep-alive'}

Expand Down Expand Up @@ -128,12 +129,19 @@ def get_file(url, output_path, auth_config, headers=None, session=None):
logger.error("Host %s responded:\n\n%s" % (urlsplit(url).netloc, r.text))
logger.warn('File transfer failed: [%s]' % output_path)
else:
total = 0
start = datetime.datetime.now()
logger.debug("Transferring file %s to %s" % (url, output_path))
with open(output_path, 'wb') as data_file:
for chunk in r.iter_content(CHUNK_SIZE):
for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
data_file.write(chunk)
data_file.flush()
logger.info('File transfer successful: [%s]' % output_path)
total += len(chunk)
elapsed = datetime.datetime.now() - start
totalSecs = elapsed.total_seconds()
totalMBs = total / (1024 * 1024)
throughput = str("%.3f MB/second" % (totalMBs / totalSecs if totalSecs > 0 else 0.001))
logger.info('File [%s] transfer successful. %.3f MB transferred at %s. Elapsed time: %s. ' %
(output_path, totalMBs, throughput, elapsed))
return True

except requests.exceptions.RequestException as e:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -14,7 +14,7 @@
url='https://github.com/ini-bdds/bdbag/',
maintainer='USC Information Sciences Institute ISR Division',
maintainer_email='misd-support@isi.edu',
version="0.9.9",
version="1.0.0",
packages=find_packages(),
package_data={'bdbag': ['profiles/*.*']},
test_suite='test',
Expand Down

0 comments on commit 89dbac4

Please sign in to comment.