Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

bugfix to look for Payload-Oxum in the right place ; also added fast=…

…True to validate command to only do oxum validation instead of recalculating fixities
  • Loading branch information...
commit 677e8ba52b0c670bbec4175d74207d1bc6ef7439 1 parent 0edb181
@edsu authored
Showing with 51 additions and 34 deletions.
  1. +1 −0  .gitignore
  2. +19 −14 bagit.py
  3. +31 −20 test.py
View
1  .gitignore
@@ -3,3 +3,4 @@ bench-data
build
dist
MANIFEST
+bagit.egg-info
View
33 bagit.py
@@ -131,6 +131,7 @@ class Bag(object):
def __init__(self, path=None):
super(Bag, self).__init__()
self.tags = {}
+ self.info = {}
self.entries = {}
self.algs = []
self.tag_file_name = None
@@ -236,13 +237,18 @@ def urls_to_be_fetched(self):
yield url
def has_oxum(self):
- return self.tags.has_key('Payload-Oxum')
-
- def validate(self):
- """Checks the structure and contents are valid
+ return self.info.has_key('Payload-Oxum')
+
+ def validate(self, fast=False):
+ """Checks the structure and contents are valid. If you supply
+ the parameter fast=True the Payload-Oxum (if present) will
+ be used to check that the payload files are present and
+ accounted for, instead of re-calculating fixities and
+ comparing them against the manifest. By default validate()
+ will re-calculate fixities (fast=False).
"""
self._validate_structure()
- self._validate_contents()
+ self._validate_contents(fast=fast)
return True
def _load_manifests(self):
@@ -254,7 +260,6 @@ def _load_manifests(self):
try:
for line in manifest_file:
- print line
line = line.strip()
# Ignore blank lines and comments.
@@ -326,16 +331,15 @@ def _validate_structure_tag_files(self):
def _validate_structure_is_valid_tag_file_name(self, file_name):
return file_name == self.tag_file_name
- def _validate_contents(self):
- """
- Validate the contents of this bag, which can be a very time-consuming
- operation
- """
+ def _validate_contents(self, fast=False):
+ if fast and not self.has_oxum():
+ raise BagValidationError("cannot validate Bag with fast=True if Bag lacks a Payload-Oxum")
self._validate_oxum() # Fast
- self._validate_entries() # *SLOW*
+ if not fast:
+ self._validate_entries() # *SLOW*
def _validate_oxum(self):
- oxum = self.tags.get('Payload-Oxum')
+ oxum = self.info.get('Payload-Oxum')
if oxum == None: return
byte_count, file_count = oxum.split('.', 1)
@@ -354,7 +358,7 @@ def _validate_oxum(self):
total_files += 1
if file_count != total_files or byte_count != total_bytes:
- raise BagError("Oxum error. Found %s files and %s bytes on disk; expected %s files and %s bytes." % (total_files, total_bytes, file_count, byte_count))
+ raise BagValidationError("Oxum error. Found %s files and %s bytes on disk; expected %s files and %s bytes." % (total_files, total_bytes, file_count, byte_count))
def _validate_entries(self):
"""
@@ -520,6 +524,7 @@ def _make_opt_parser():
help='parallelize checksums generation')
parser.add_option('--log', action='store', dest='log')
parser.add_option('--quiet', action='store_true', dest='quiet')
+ parser.add_option('--validate', action='store-true', dest='validate')
for header in _bag_info_headers:
parser.add_option('--%s' % header.lower(), type="string",
View
51 test.py
@@ -1,5 +1,4 @@
import os
-import sys
import shutil
import datetime
import unittest
@@ -17,7 +16,7 @@ def tearDown(self):
if os.path.isdir('test-data-tmp'):
shutil.rmtree('test-data-tmp')
- def atest_make_bag(self):
+ def test_make_bag(self):
info = {'Contact-Email': 'ehs@pobox.com'}
bag = bagit.make_bag('test-data-tmp', bag_info=info)
@@ -47,7 +46,7 @@ def atest_make_bag(self):
self.assertTrue('Bagging-Date: %s' % today in bag_info_txt)
self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt)
- def atest_bag_class(self):
+ def test_bag_class(self):
info = {'Contact-Email': 'ehs@pobox.com'}
bag = bagit.make_bag('test-data-tmp', bag_info=info)
self.assertTrue(isinstance(bag, bagit.Bag))
@@ -58,36 +57,48 @@ def atest_bag_class(self):
'data/loc/2478433644_2839c5e8b8_o_d.jpg',
'data/loc/3314493806_6f1db86d66_o_d.jpg']))
self.assertEqual(list(bag.manifest_files()), ['test-data-tmp/manifest-md5.txt'])
- self.assertEqual(bag.validate(), True)
- def atest_bag_constructor(self):
+ def test_has_oxum(self):
+ bag = bagit.make_bag('test-data-tmp')
+ self.assertTrue(bag.has_oxum())
+
+ def test_bag_constructor(self):
bag = bagit.make_bag('test-data-tmp')
bag = bagit.Bag('test-data-tmp')
self.assertEqual(type(bag), bagit.Bag)
self.assertEqual(len(list(bag.payload_files())), 5)
def test_bag_url(self):
- bag = bagit.Bag('http://sun9.loc.gov/ingest-internal/copyright/AT19281977ACE-ACZ/')
- self.assertEqual(len(list(bag.payload_files())), 20)
+ bag = bagit.Bag('http://chroniclingamerica.loc.gov/data/dlc/batch_dlc_jamaica_ver01/')
+ self.assertEqual(len(bag.entries), 19396)
-class TestValidation(unittest.TestCase):
+ def test_validate(self):
+ bag = bagit.make_bag('test-data-tmp')
+ self.assertEqual(bag.validate(), True)
+ os.remove(os.path.join("test-data-tmp", "data", "loc",
+ "2478433644_2839c5e8b8_o_d.jpg"))
+ self.assertRaises(bagit.BagValidationError, bag.validate)
- def setUp(self):
- if os.path.isdir('test-data-tmp'):
- shutil.rmtree('test-data-tmp')
- shutil.copytree('test-data', 'test-data-tmp')
- self.bag = bagit.make_bag('test-data-tmp')
+ def test_validate_fast(self):
+ bag = bagit.make_bag('test-data-tmp')
+ self.assertEqual(bag.validate(fast=True), True)
+ os.remove(os.path.join("test-data-tmp", "data", "loc",
+ "2478433644_2839c5e8b8_o_d.jpg"))
+ self.assertRaises(bagit.BagValidationError, bag.validate, fast=True)
- def tearDown(self):
- if os.path.isdir('test-data-tmp'):
- shutil.rmtree('test-data-tmp')
- self.bag = None
+ def test_validate_fast_without_oxum(self):
+ bag = bagit.make_bag('test-data-tmp')
+ os.remove(os.path.join("test-data-tmp", "bag-info.txt"))
+ bag = bagit.Bag('test-data-tmp')
+ self.assertRaises(bagit.BagValidationError, bag.validate, fast=True)
- def atest_missing_file(self):
+ def test_missing_file(self):
+ bag = bagit.make_bag('test-data-tmp')
os.remove('test-data-tmp/data/loc/3314493806_6f1db86d66_o_d.jpg')
- self.assertRaises(bagit.BagValidationError, self.bag.validate)
+ self.assertRaises(bagit.BagValidationError, bag.validate)
- def atest_different_file(self):
+ def test_different_file(self):
+ bag = bagit.make_bag('test-data-tmp')
self.assertTrue(os.path.isfile('test-data-tmp/data/loc/3314493806_6f1db86d66_o_d.jpg'))
fh = open('test-data-tmp/data/loc/3314493806_6f1db86d66_o_d.jpg', 'w')
fh.write('all your file are belong to us')
Please sign in to comment.
Something went wrong with that request. Please try again.