Permalink
Browse files

Stop requesting MWDL records when TOTALHITS is met

-Also lowered bulksize to 100 in mwdl.pjs
  • Loading branch information...
DPLA Camps User
DPLA Camps User committed Mar 19, 2013
1 parent e1e0d96 commit 3ad5feb16a0e2f742d059e8c6edb37201d09642c
Showing with 7 additions and 6 deletions.
  1. +1 −1 profiles/mwdl.pjs
  2. +6 −5 scripts/poll_profiles
View
@@ -1,5 +1,5 @@
{
- "bulk_size": "500",
+ "bulk_size": "100",
"enrichments_coll": [],
"name": "mwdl",
"enrichments_rec": [
View
@@ -74,7 +74,7 @@ def process_profile(uri_base, profile_f):
def process_primo_all(profile, blacklist=None):
# TODO flag to stop requesting
request_more = True
- index = 1
+ index = 0
while request_more:
collection = {}
collection['id'] = 1
@@ -87,17 +87,18 @@ def process_primo_all(profile, blacklist=None):
print >> sys.stderr, ' HTTP error (%s) resolving URL: %s' % (resp[u'status'], endpoint)
request_more = False
- print >> sys.stderr, "Index: %s" % index
-
endpoint_content = ARC_PARSE(content)
+ total_hits = endpoint_content['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['TOTALHITS']
+ print >> sys.stderr, "%s of %s total documents" % (index, total_hits)
items = endpoint_content['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
+
for item in (items if isinstance(items, list) else [items]):
item['_id'] = item['PrimoNMBib']['record']['control']['recordid']
collection['items'].append(item)
+ index += 1
enrich_coll(profile, collection['name'], json.dumps({'items':collection['items']}))
- index += int(profile[u'bulk_size'])
- if index > 5000:
+ if int(index) == int(total_hits):
request_more = False
return True

0 comments on commit 3ad5feb

Please sign in to comment.