Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 72 lines (53 sloc) 1.725 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
#!/usr/bin/env python

"""
This is a little benchmarking script to exercise bagit.make_bag and
bagit.validate using 1-8 parallel processes. It will download some images
from NASA for use in bagging the first time it is run.
"""

import os
import ftplib
import shutil
import timeit

# fetch some images from NASA to bag up

if not os.path.isdir('bench-data'):
    print "fetching some images to bag up from nasa"
    os.mkdir('bench-data')
    ftp = ftplib.FTP('nssdcftp.gsfc.nasa.gov')
    ftp.login()

    ftp.cwd('/photo_gallery/hi-res/planetary/mars/')
    files = []
    ftp.retrlines('NLST', files.append)

    for file in files:
        print "fetching %s" % file
        fh = open(os.path.join('bench-data', file), 'wb')
        ftp.retrbinary('RETR %s' % file, fh.write)
        fh.close()


# create bags using 1-8 processes

statement = """
import os
import bagit

if os.path.isdir('bench-data/data'):
os.system("rm bench-data/bag*")
os.system("mv bench-data/data/* bench-data/")
os.system("rmdir bench-data/data")

bagit.make_bag('bench-data', processes=%s)
"""

for p in range(1, 9):
    t = timeit.Timer(statement % p)
    print "create w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))


# validate a bag with 1-8 processes

import bagit
shutil.copytree('bench-data', 'bench-data-bag')
bagit.make_bag('bench-data-bag')

# validate bench-data using n processes
statement = """
import os
import bagit

bag = bagit.Bag('bench-data-bag')
bag.validate(processes=%s)
"""

# try 1-8 parallel processes
for p in range(1, 9):
    t = timeit.Timer(statement % p)
    print "validate w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))

shutil.rmtree('bench-data-bag')
Something went wrong with that request. Please try again.