Skip to content
Browse files

initial code for bagging utility

  • Loading branch information...
0 parents commit 97a7675250c8e3cece2401258323be548ab0dec3 Ed Summers committed Feb 9, 2010
103 bag.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+"""
+A command line tool for creating BagIt bags on Unix systems with md5deep
+installed.
+
+Basic usage is to give bag a directory to bag up:
+
+ % bag my_directory
+
+You can bag multiple directories if you wish:
+
+ % bag directory1 directory2
+
+Optionally you can pass metadata intended for the bag-info.txt:
+
+ % bag --source-organization "Library of Congress" directory
+
+For more help see:
+
+ % bag --help
+"""
+
+import os
+import logging
+
+from datetime import date
+from optparse import OptionParser
+
+# command line options will be created for these bag-info.txt headers
+
+bag_info_headers = [
+ 'Source-Organization',
+ 'Organization-Address',
+ 'Contact-Name',
+ 'Contact-Phone',
+ 'Contact-Email',
+ 'External-Description',
+ 'External-Identifier',
+ 'Bag-Size',
+ 'Bag-Group-Identifier',
+ 'Bag-Count',
+ 'Internal-Sender-Identifier',
+ 'Internal-Sender-Description',
+ # Bagging Date is autogenerated
+ # Payload-Oxum is autogenerated
+]
+
+def make_bag(bag_dir, bag_info=None):
+ if not os.path.isdir(bag_dir):
+ raise RuntimeError("no such bag directory %s" % bag_dir)
+
+ old_dir = os.path.abspath(os.path.curdir)
+ os.chdir(bag_dir)
+
+ try:
+ logging.info("creating data dir")
+ os.system('mkdir data')
+ os.system('mv * data 2>/dev/null')
+
+ logging.info("writing manifest-md5.txt")
+ os.system('md5deep -rl data > manifest-md5.txt')
+
+ logging.info("writing bagit.txt")
+ txt = """BagIt-Version: 0.96\nTag-File-Character-Encoding: UTF-8"""
+ open("bagit.txt", "w").write(txt)
+
+ logging.info("writing bag-info.txt")
+ bag_info_txt = open("bag-info.txt", "w")
+ if bag_info == None:
+ bag_info = {}
+ bag_info['Bagging-Date'] = date.strftime(date.today(), "%Y-%m-%d")
+ headers = bag_info.keys()
+ headers.sort()
+ for h in headers:
+ bag_info_txt.write("%s: %s\n" % (h, bag_info[h]))
+ bag_info_txt.close()
+
+ except Exception, e:
+ logging.error(e)
+
+ finally:
+ os.chdir(old_dir)
+
+def bag_info_store(option, opt, value, parser):
+ opt = opt.lstrip('--')
+ opt_caps = '-'.join([o.capitalize() for o in opt.split('-')])
+ if not hasattr(parser, 'bag_info'):
+ parser.bag_info = {}
+ parser.bag_info[opt_caps] = value
+
+def make_opt_parser():
+ parser = OptionParser(usage='usage: %prog [options] dir1 dir2 ...')
+ for header in bag_info_headers:
+ parser.add_option('--%s' % header.lower(), type="string",
+ action='callback', callback=bag_info_store)
+ return parser
+
+if __name__ == '__main__':
+ opt_parser = make_opt_parser()
+ opts, args = opt_parser.parse_args()
+ for bag_dir in args:
+ make_bag(bag_dir, bag_info=opt_parser.bag_info)
7 test-data/README
@@ -0,0 +1,7 @@
+public domain images obtained from flickr commons:
+
+http://www.flickr.com/photos/smithsonian/2584174182/
+http://www.flickr.com/photos/smithsonian/4011399822/
+http://www.flickr.com/photos/library_of_congress/2478433644/
+
+
BIN test-data/loc/2478433644_2839c5e8b8_o_d.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN test-data/loc/3314493806_6f1db86d66_o_d.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN test-data/si/2584174182_ffd5c24905_b_d.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN test-data/si/4011399822_65987a4806_b_d.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 test.py
@@ -0,0 +1,23 @@
+import os
+import sys
+import shutil
+import unittest
+
+import bag
+
+class TestBag(unittest.TestCase):
+
+ def setUp(self):
+ if os.path.isdir('test-data-tmp'):
+ shutil.rmtree('test-data-tmp')
+ shutil.copytree('test-data', 'test-data-tmp')
+
+ def test_make_bag(self):
+ bag.make_bag('test-data-tmp')
+
+ def tearDown(self):
+ if os.path.isdir('test-data-tmp'):
+ shutil.rmtree('test-data-tmp')
+
+if __name__ == '__main__':
+ unittest.main()

0 comments on commit 97a7675

Please sign in to comment.
Something went wrong with that request. Please try again.