In [0]:
import requests
import json
import time
import math
from datetime import datetime, timedelta

from pprint import PrettyPrinter
pp = PrettyPrinter(indent=4)

In [0]:
url = 'https://adams.nrc.gov/wba/services/search/simple'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/112.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'X-Requested-With': 'XMLHttpRequest',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Origin': 'https://adams.nrc.gov',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Referer': 'https://adams.nrc.gov/wba/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin'
}

data = {
    'start': '0',
    'query': f'SELECT DocumentTitle, AccessionNumber, DocumentDate, PublishDatePARS FROM Document WHERE DocumentType IS NOT NULL',
    'initial': 'true',
    # 'sort': '$size',
    # 'dir': 'ASC',
    'fieldSet': 'pars',
    'formatSet': 'default',
    'limit': '1000',
    'queryParams': '{}',
    'dataProviderId': 'ce_bp8os_repository',
    'operationId': 'advanced_search',
    'filters': '{"operation": "and", "operand": []}'
}

cookies = {
    'Cookie': 'ys-folder-view-tab-tree-holder=o%3Acollapsed%3Db%253A0; bm_sv=0688099B00A6C66EACAC987B6BECA3DB~YAAQJq08F80Eb6eHAQAAEa9/qhMYgwTLfMaIDwcEFTIx4oFAlzHxGMacl7RcyVWna13y2qwSkawES/g9QLYNg13ZG17235pk99E4/SpkT92HJITTONfv+iVheVmPCATs0CSmKdWxweMtAmLYp3HWighSc8+FbcibzRoBcJDtF0rOf+purNdKCdmwUhSWct6YFfNvI7IHUYy4PJ7K0OJJ658pERKUYQ5d9dU6qCmh82f2nEHTRBnQiYDuDns1Hw==~1; JSESSIONID=0000Ks0c6ZOMioQ-plvl9X9_qej:-1; ak_bmsc=27622EFEB2E5ABE08309E4F14A5C3044~000000000000000000000000000000~YAAQJq08F+cDb6eHAQAAPaF/qhNosQufgDyaivBTTjVG9LNi0tJjyXmEDxvXol+WdzmFwimlf6QaenPkAOGuIMyJj8RqGzdzY4MT/iMGtMFvdgv4q7LMb8LeA0EJmtN7CTZWGEXUFr2kEiRXMGt1MLqRoRYyB6JU5XBfctoD43tHH/khaSYY+tmNpPUYqEstYlLwjg3aqx0aJD3h0PN8elSv3dV2GpQMGJiL/gvecaQ/fPHuUD4g/qGKMmD2QKjRiE8o3reeZnRgKLaUmJAj2eROXtnbu/PfH/xWyG2jlfgKmknDH6FM/KdTBARFmL1U4M/eWsF+QD8gLE7LOEbtb7aUjllAz+MGWPM7KKFGJ69IehfcS1lRoCoQjR+LNw=='
}

def offset_data(data, start, end):
    start = start.strftime('%Y%m%dT%H%M%S%Z') + 'Z'
    end = end.strftime('%Y%m%dT%H%M%S%Z') + 'Z'
    print(start, end)
    data['query'] = f'SELECT * FROM Document WHERE (PublishDatePARS > {start}) AND (PublishDatePARS < {end}) AND DocumentType IS NOT NULL'
    
    return data

def paginate(start, end, slide, headers, data, cookies):
    start = datetime.strptime(start, '%Y-%m-%d')
    end = datetime.strptime(end, '%Y-%m-%d')
    
    rel_start = start
    time_difference = end.date() - start.date()
    days = math.ceil(time_difference.days / slide)
    
    full_json = []
    mod_data = data.copy()
    
    for _ in range(days):
        # Update rel end
        rel_end = rel_start + timedelta(days=slide)    

        mod_data = offset_data(data, rel_start, rel_end)
        response = requests.post(url, headers=headers, data=mod_data, cookies=cookies)
        try:
            resp_json = json.loads(response.text)
        except Exception as e:
            print(response.text)
            raise(e)
        
        docs = resp_json['docs']
        
        if len(docs) > 0:
            print(f"Appended {len(docs)} to full!")
            full_json.extend(resp_json['docs'])
        else:
            print('Error', resp_json)

        # Update rel start
        rel_start = rel_start + timedelta(days=slide)
        
    return full_json

In [0]:
docs_json = paginate('2023-04-01', '2023-04-05', 2, headers, data, cookies)

20230401T000000Z 20230403T000000Z
Error {'docs': [], 'total': 0, 'messages': ['No results found.']}
20230403T000000Z 20230405T000000Z
Appended 314 to full!


In [0]:
len(docs_json)

Out[18]: 314

In [0]:
with open('docs.json', 'w') as file:
    json.dump(docs_json, file)

In [0]:
doc_titles = [doc["title"] for doc in docs_json]

In [0]:
pp.pprint(sorted(doc_titles))

[   '03-27-23 - Letter to Honorable Cathy McMorris Rodgers, et al., from Chair '
    'Hanson re: Submits the February 2023 Monthly Status Report on the U.S. '
    'NRC Activities and Expenditures of Unobligated Carryover Funds '
    'Appropriated from the Nuclear Waste Fund',
    '04/05/2023 Presubmittal Meeting with Arizona Public Service Company '
    'Regarding Planned Containment Temperature and Safety Injection Tank '
    'Volume License Amendment Request(s) at Palo Verde Nuclear Generating '
    'Station, Units 1, 2, and 3 (EPID L-2023-LRM-0016)',
    '04/12/2023 Information Meeting with Federally-Recognized Tribes Regarding '
    'the Proposed Rule for Renewing Nuclear Power Plant Operating Licenses  '
    'Environmental Review',
    '04/13/2023 Meeting with NextEra Energy/Florida Power & Light Company to '
    'Discuss the St. Lucie Plant, Units 1 and 2 and Turkey Point Nuclear '
    'Generating, Units 3 and 4 Improved Technical Specifications License '
    'Amendment Requests.