In [1]:
import base64
from functools import wraps
from itertools import chain, islice
import json
import os
import pandas as pd
import requests

def legiscan_api(api_action):
    API_KEY = os.environ.get("LEGISCAN_API_KEY")
    
    @wraps(api_action)
    def api_wrapper(*args, **kwargs):
        if 'api_key' in kwargs and kwargs['api_key']:
            return api_action(*args, **kwargs)
        
        return api_action(*args, **(kwargs | {'api_key': API_KEY}))
                          
    return api_wrapper

def get_bill_meta_filename(state, bill_id):
    return f'tmp/bill_meta_{state}_{bill_id}.json'

@legiscan_api
def get_bill_meta(state, bill_id, legiscan_bill_id, api_key: str):
    local_filename = get_bill_meta_filename(state, bill_id)

    if os.path.exists(local_filename):
        #print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBill&id={legiscan_bill_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

def get_bill_text_response_filename(state, bill_id):
    return f'tmp/bill_text_response_{state}_{bill_id}.json'

@legiscan_api
def get_bill_text(state, bill_id, legiscan_bill_id, bill_meta_path: str, api_key: str):
    local_filename = get_bill_text_response_filename(state, bill_id)

    if not bill_meta_path:
        print(f'Missing meta data {get_bill_meta_filename(state, bill_id)}')
        return None

    meta = None
    with open(bill_meta_path, 'r') as f:
        meta = json.load(f)

    texts = meta['bill']['texts']
    sorted_texts = sorted(texts, key=lambda x: x['date'], reverse=True)
    
    if len(sorted_texts) < 1:
        print(f'No bill texts available yet for {bill_meta_path}')
        return None
    
    doc_id = sorted_texts[0]['doc_id']

    if os.path.exists(local_filename):
        # print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBillText&id={doc_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

def get_bill_contents_filename(state, bill_id, extension):
    return f'bills/{state}_{bill_id}.{extension}'

def extract_bill_contents(state, bill_id, legiscan_bill_id, _meta_path, response_path: str):
    result = None
    
    if not response_path:
        print(f'Missing response data {get_bill_text_response_filename(state, bill_id)}')
        return None
    
    with open(response_path, 'r') as f:
        result = json.load(f)['text']

    doc = result['doc']
    extension = result['mime'].split('/')[-1]
    local_filename = get_bill_contents_filename(state, bill_id, extension)

    with open(local_filename, 'wb') as f:
        f.write(base64.b64decode(doc))
    
    print(f'Created {local_filename}')
    return local_filename


In [2]:
def retrieve_all(filename: str):
    
    mapper = {}
    with open(filename, 'r') as f:
        mapper = json.load(f)

    tuples = chain.from_iterable(
        ((state, k, v) for k, v in m['bills'].items())
        for state, m 
        in mapper.items()
    )

    metas = [(*t, get_bill_meta(*t)) for t in tuples]
    responses = [(*meta, get_bill_text(*meta)) for meta in metas]
    extracts = [extract_bill_contents(*response) for response in responses]
    return len(extracts)

download_count = retrieve_all('resolver_map.json')
download_count

No bill texts available yet for tmp/bill_meta_SC_S0585.json
Created bills/AK_HB27.pdf
Created bills/AK_SB96.pdf
Created bills/AK_HB105.pdf
Created bills/AR_HB1156.pdf
Created bills/AR_HB1468.pdf
Created bills/AR_SB125.pdf
Created bills/AR_SB199.pdf
Created bills/AR_SB270.pdf
Created bills/AR_SB43.pdf
Created bills/AR_SB294.pdf
Created bills/AR_HB1615.pdf
Created bills/AZ_HB2312.html
Created bills/AZ_HB2517.html
Created bills/AZ_HB2711.html
Created bills/AZ_SB1001.html
Created bills/AZ_SB1026.html
Created bills/AZ_SB1028.html
Created bills/AZ_SB1030.html
Created bills/AZ_SB1040.html
Created bills/AZ_SB1417.html
Created bills/AZ_SB1697.html
Created bills/AZ_SB1698.html
Created bills/AZ_SB1700.html
Created bills/AZ_SB1702.html
Created bills/AZ_SB1694.html
Created bills/CA_AB1314.html
Created bills/CO_HB1092.pdf
Created bills/CO_HB1098.pdf
Created bills/CT_HB06213.pdf
Created bills/CT_SB00467.pdf
Created bills/CT_SB00468.pdf
Created bills/FL_H1421.pdf
Created bills/FL_H1521.pdf
Created bil

478