In [1]:
import base64
from functools import wraps
import json
import os
import pandas as pd
import requests

def legiscan_api(api_action):
    API_KEY = os.environ.get("LEGISCAN_API_KEY")
    
    @wraps(api_action)
    def api_wrapper(*args, **kwargs):
        if 'api_key' in kwargs and kwargs['api_key']:
            return api_action(*args, **kwargs)
        
        return api_action(*args, **(kwargs | {'api_key': API_KEY}))
                          
    return api_wrapper

def get_bill_contents_filename(row, extension):
    state = row['state']
    bill_id = '_'.join(row['billId'].split(' '))
    return f'bills/{state}_{bill_id}.{extension}'

def get_bill_text_response_filename(row):
    state = row['state']
    bill_id = '_'.join(row['billId'].split(' '))
    return f'tmp/bill_text_response_{state}_{bill_id}.json'

def get_bill_meta_filename(row):
    state = row['state']
    bill_id = '_'.join(row['billId'].split(' '))
    return f'tmp/bill_meta_{state}_{bill_id}.json'

@legiscan_api
def get_bill_meta(row, api_key: str):
    local_filename = get_bill_meta_filename(row)
    legiscan_bill_id = row['legiscanId']

    if os.path.exists(local_filename):
        #print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBill&id={legiscan_bill_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

@legiscan_api
def get_bill_text(row, bill_meta_path: str, api_key: str):
    local_filename = get_bill_text_response_filename(row)

    if not bill_meta_path:
        print(f'Missing meta data {get_bill_meta_filename(row)}')
        return None

    meta = None
    with open(bill_meta_path, 'r') as f:
        meta = json.load(f)

    texts = meta['bill']['texts']
    sorted_texts = sorted(texts, key=lambda x: x['date'], reverse=True)
    
    if len(sorted_texts) < 1:
        print(f'No bill texts available yet for {bill_meta_path}')
        return None
    
    doc_id = sorted_texts[0]['doc_id']

    if os.path.exists(local_filename):
        # print(f'skipping {local_filename}, exists')
        return local_filename

    assembled_url = f'https://api.legiscan.com/?key={api_key}&op=getBillText&id={doc_id}'
    resp = requests.get(assembled_url)

    if not resp.ok:
        print(f'Error {resp.status_code} downloading {local_filename}')
        return None
    
    parsed = json.loads(resp.text)
    if parsed['status'].upper() == 'ERROR':
        print(f'Error {parsed["alert"]["message"]} downloading {local_filename}')
        return None
    
    with open(local_filename, 'wb') as f:
        f.write(resp.content)
    
    print(f'got {local_filename}')
    return local_filename

def extract_bill_contents(row, response_path: str):
    result = None
    
    if not response_path:
        print(f'Missing response data {get_bill_text_response_filename(row)}')
        return None
    
    with open(response_path, 'r') as f:
        result = json.load(f)['text']

    doc = result['doc']
    extension = result['mime'].split('/')[-1]
    local_filename = get_bill_contents_filename(row, extension)

    with open(local_filename, 'wb') as f:
        f.write(base64.b64decode(doc))
    
    #print(f'Created {local_filename}')
    return local_filename


In [2]:
def retrieve_all(filename: str):
    raw = pd.read_json(filename)
    # sample = raw.sample(n=15, random_state=1234)
    sample = raw.copy()

    metas = [(row, get_bill_meta(row)) for idx, row in sample.iterrows()]
    responses = [(meta[0], get_bill_text(*meta)) for meta in metas]
    extracts = [extract_bill_contents(*response) for response in responses]
    return len(extracts)

download_count = retrieve_all('tracktranslegislation.json')
download_count

got tmp/bill_meta_AK_HB27.json
got tmp/bill_meta_AR_HB1156.json
got tmp/bill_meta_AR_HB1468.json
got tmp/bill_meta_AR_SB125.json
got tmp/bill_meta_AR_SB199.json
got tmp/bill_meta_AR_SB270.json
got tmp/bill_meta_AR_SB43.json
got tmp/bill_meta_AZ_HB2312.json
got tmp/bill_meta_AZ_HB2517.json
got tmp/bill_meta_AZ_HB2711.json
got tmp/bill_meta_AZ_SB1001.json
got tmp/bill_meta_AZ_SB1026.json
got tmp/bill_meta_AZ_SB1028.json
got tmp/bill_meta_AZ_SB1030.json
got tmp/bill_meta_AZ_SB1040.json
got tmp/bill_meta_AZ_SB1417.json
got tmp/bill_meta_AZ_SB1697.json
got tmp/bill_meta_AZ_SB1698.json
got tmp/bill_meta_AZ_SB1700.json
got tmp/bill_meta_AZ_SB1702.json
got tmp/bill_meta_CA_AB1314.json
got tmp/bill_meta_CO_HB1092.json
got tmp/bill_meta_CO_HB1098.json
got tmp/bill_meta_CT_HB06213.json
got tmp/bill_meta_CT_SB00467.json
got tmp/bill_meta_CT_SB00468.json
got tmp/bill_meta_FL_H1421.json
got tmp/bill_meta_FL_H1521.json
got tmp/bill_meta_FL_S0254.json
got tmp/bill_meta_FL_S1320.json
got tmp/bill_meta_

372