Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Stop requesting MWDL records when TOTALHITS is met

-Also lowered bulksize to 100 in mwdl.pjs
  • Loading branch information...
commit 3ad5feb16a0e2f742d059e8c6edb37201d09642c 1 parent e1e0d96
DPLA Camps User authored
Showing with 7 additions and 6 deletions.
  1. +1 −1  profiles/mwdl.pjs
  2. +6 −5 scripts/poll_profiles
View
2  profiles/mwdl.pjs
@@ -1,5 +1,5 @@
{
- "bulk_size": "500",
+ "bulk_size": "100",
"enrichments_coll": [],
"name": "mwdl",
"enrichments_rec": [
View
11 scripts/poll_profiles
@@ -74,7 +74,7 @@ def process_profile(uri_base, profile_f):
def process_primo_all(profile, blacklist=None):
# TODO flag to stop requesting
request_more = True
- index = 1
+ index = 0
while request_more:
collection = {}
collection['id'] = 1
@@ -87,17 +87,18 @@ def process_primo_all(profile, blacklist=None):
print >> sys.stderr, ' HTTP error (%s) resolving URL: %s' % (resp[u'status'], endpoint)
request_more = False
- print >> sys.stderr, "Index: %s" % index
-
endpoint_content = ARC_PARSE(content)
+ total_hits = endpoint_content['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['TOTALHITS']
+ print >> sys.stderr, "%s of %s total documents" % (index, total_hits)
items = endpoint_content['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
+
for item in (items if isinstance(items, list) else [items]):
item['_id'] = item['PrimoNMBib']['record']['control']['recordid']
collection['items'].append(item)
+ index += 1
enrich_coll(profile, collection['name'], json.dumps({'items':collection['items']}))
- index += int(profile[u'bulk_size'])
- if index > 5000:
+ if int(index) == int(total_hits):
request_more = False
return True
Please sign in to comment.
Something went wrong with that request. Please try again.