In [6]:
import base64
from functools import wraps
from itertools import chain, islice
import json
import os
import pandas as pd
import requests

def legiscan_api(api_action):
    API_KEY = os.environ.get("LEGISCAN_API_KEY")
    
    @wraps(api_action)
    def api_wrapper(*args, **kwargs):
        if 'api_key' in kwargs and kwargs['api_key']:
            return api_action(*args, **kwargs)
        
        return api_action(*args, **(kwargs | {'api_key': API_KEY}))
                          
    return api_wrapper

def get_bill_contents_filename(row, extension):
    state = row['state']
    bill_id = '_'.join(row['billId'].split(' '))
    return f'bills/{state}_{bill_id}.{extension}'

def get_bill_text_response_filename(row):
    state = row['state']
    bill_id = '_'.join(row['billId'].split(' '))
    return f'tmp/bill_text_response_{state}_{bill_id}.json'

def get_bill_meta_filename(tp):
    state = tp[0]
    bill_id = tp[1]
    return f'tmp/bill_meta_{state}_{bill_id}.json'

@legiscan_api
def get_bill_meta(tp, api_key: str):
    local_filename = get_bill_meta_filename(tp)
    legiscan_bill_id = tp[2]

    if os.path.exists(local_filename):
        #print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBill&id={legiscan_bill_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

@legiscan_api
def get_bill_text(row, bill_meta_path: str, api_key: str):
    local_filename = get_bill_text_response_filename(row)

    if not bill_meta_path:
        print(f'Missing meta data {get_bill_meta_filename(row)}')
        return None

    meta = None
    with open(bill_meta_path, 'r') as f:
        meta = json.load(f)

    texts = meta['bill']['texts']
    sorted_texts = sorted(texts, key=lambda x: x['date'], reverse=True)
    
    if len(sorted_texts) < 1:
        print(f'No bill texts available yet for {bill_meta_path}')
        return None
    
    doc_id = sorted_texts[0]['doc_id']

    if os.path.exists(local_filename):
        # print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBillText&id={doc_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

def extract_bill_contents(row, response_path: str):
    result = None
    
    if not response_path:
        print(f'Missing response data {get_bill_text_response_filename(row)}')
        return None
    
    with open(response_path, 'r') as f:
        result = json.load(f)['text']

    doc = result['doc']
    extension = result['mime'].split('/')[-1]
    local_filename = get_bill_contents_filename(row, extension)

    with open(local_filename, 'wb') as f:
        f.write(base64.b64decode(doc))
    
    #print(f'Created {local_filename}')
    return local_filename


In [7]:
def retrieve_all(filename: str):
    
    mapper = {}
    with open(filename, 'r') as f:
        mapper = json.load(f)

    tuples = islice(chain.from_iterable(
        ((state, k, v) for k, v in m['bills'].items())
        for state, m 
        in mapper.items()
    ), 50)

    metas = [(t, get_bill_meta(t)) for t in tuples]

    
#     raw = pd.read_json(filename)
#     # sample = raw.sample(n=15, random_state=1234)
#     sample = raw.copy()

#     metas = [(row, get_bill_meta(row)) for idx, row in sample.iterrows()]
#     responses = [(meta[0], get_bill_text(*meta)) for meta in metas]
#     extracts = [extract_bill_contents(*response) for response in responses]
#     return len(extracts)

download_count = retrieve_all('resolver_map.json')
download_count

got tmp/bill_meta_AK_SB96.json
got tmp/bill_meta_AK_HB105.json
got tmp/bill_meta_AR_SB294.json
got tmp/bill_meta_AR_HB1615.json
got tmp/bill_meta_AZ_SB1694.json
got tmp/bill_meta_FL_S0266.json
got tmp/bill_meta_FL_H0999.json
got tmp/bill_meta_FL_H1223.json
got tmp/bill_meta_FL_H1069.json
got tmp/bill_meta_FL_H0991.json
got tmp/bill_meta_GA_SB180.json
