# Retrieving bill data from the LegiScan API

In [None]:
import json
import os
from pathlib import Path

import numpy as np
import pandas as pd
import requests
from IPython.display import Image
from legcop import LegiScan

## Retrieve a bill

You could do this the old-fashioned way:

In [None]:
r = requests.get(
    'https://api.legiscan.com/',
    params={
        'key': os.getenv('LEGISCAN_API_KEY'),
        'op': 'getBill',
        'state': 'US',
        'bill': 'HJR100',
    },
)

In [None]:
Image(filename='../01_inputs/documentation/operations.png')

Or you could use a preexisting Python wrapper:

In [None]:
legis = LegiScan(apikey=os.getenv('LEGISCAN_API_KEY'))

In [None]:
hr100 = legis.get_bill(state='US', bill_number='HJR100')

In [None]:
hr100

## Save bill object

In [None]:
def save_requested_bill(data: dict, to_csv: bool):
    """Save result returned by API to a JSON file.

    Parameters
    ----------
    data : dict
        Raw data returned by the LegiScan API

    use_bill_id : bool, default=True

    Returns
    -------
    None

    """
    try:
        bill_id = data['bill_id']
    except KeyError:
        print(f'This data does not have a `bill_id`. Keys found: {", ".join(keys)}')
    if to_csv:
        Path('../04_outputs/csv').mkdir(exist_ok=True)
        filename = f'../04_outputs/csv/{bill_id}.csv'
        df = pd.json_normalize(data)
        df.to_csv(filename, index=False)
        print(f'Bill saved to {filename}')
    else:
        filename = f'../04_outputs/json/{bill_id}.json'
        with open(filename, 'w') as fp:
            json.dump(data, fp)
            print(f'Bill saved to {filename}')

## Parse bill object

In [None]:
def load_bill_json(bill_id: int, parsed: bool):
    if parsed:
        parent = '../04_outputs/parsed/bills'
    else:
        parent = '../04_outputs/json'
    with open(f'{parent}/{bill_id}.json') as fp:
        bill = json.load(fp)
    return bill

### Nested

In [None]:
def determine_nested_data(bill_obj: dict):
    """Helper function for save_nested_data()"""
    bill_id = bill_obj['bill_id']
    nested_fields = []
    for k, v in bill_obj.items():
        if isinstance(v, list) or isinstance(v, dict):
            if len(v) > 0:
                nested_fields.append(k)
            else:
                bill_obj[k] = ''
    return nested_fields

In [None]:
def save_nested_data(bill_obj: dict):
    """Helper function for parse_bill_json"""
    bill_id = bill_obj['bill_id']
    nested_fields = determine_nested_data(bill_obj)
    for field in nested_fields:
        Path(f'../04_outputs/parsed/{field}/').mkdir(parents=True, exist_ok=True)
        nested_filename = f'../04_outputs/parsed/{field}/{bill_id}.json'
        nested_obj = bill_obj.pop(field)
        with open(nested_filename, 'w') as fp:
            json.dump(nested_obj, fp)
    Path('../04_outputs/parsed/bills/').mkdir(exist_ok=True)
    with open(f'../04_outputs/parsed/bills/{bill_id}.json', 'w') as fp:
        json.dump(bill_obj, fp)

In [None]:
def parse_bill_json(bill_id: int):
    bill = load_bill_json(bill_id, parsed=False)
    parsed_bill = save_nested_data(bill)
    return parsed_bill

In [None]:
save_requested_bill(hr100, to_csv=True)

## Multiple bills

In [None]:
def prepare_bill(file_path: str):
    with open(file_path) as fp:
        bill = json.load(fp)
    df = pd.json_normalize(bill)
    return df

In [None]:
files = Path('../04_outputs/json/US_117th_congress_bills/').glob('*.json')

In [None]:
dataframes = [prepare_bill(f) for f in files]

In [None]:
df = pd.concat(dataframes, ignore_index=True)

In [None]:
df.to_csv('../04_outputs/csv/US_117th_congress.csv', index=False)