In [8]:
import requests
import pandas as pd
from datetime import datetime

In [9]:
# Replace with your actual API key
api_key = "XLC9cIQDZOgmCiMCT43D1umf6VeOFuGU"

# Base URL for the API
base_url = "https://legislation.nysenate.gov/api/3/bills/"

# Mapping columns to city tracker
column_mappings = {
    "Last Updated": None,  # date tracker is exported
    "Session": "session",  # session that bill was introduced
    "Chamber": "billType_desc",  # chamber that bill was introduced in
    "Bill Number": "basePrintNo",  # bill number
    "Date Introed": "publishedDateTime",  # this date will be reformatted
    "Automated Status": "status_statusDesc",  # status of the bill
    "Sponsor": "sponsor_member_fullName",
    "Co Sponsor Count": "amendments_items_coSponsors_size",  # result.amendments.items..coSponsors.size
    "Co Sponsor Count Change": None,  # placeholder
    "Bill Last Changed": "status_actionDate",
    "Summary": "summary",
    "Committee": "status_committeeName",  # committee name
    "Co Sponsors": None,  # number of co-sponsors. if none should be 0
    "Notes": None,  # placeholder
    "Laws Impacted": "amendments_items_relatedLaws_items_AMEND_items",  # result.amendments.items..relatedLaws.items.AMEND.items
    "Bill Link": None,  # add the link to the bill url
    "Priority Level": None  # placeholder
}

In [10]:
def fetch_bills_by_year(api_key, session_year, committee_filter=None):
    """
    Fetch all the bills for a given session year from the NY Senate API,
    with an optional filter for committee names applied later in the logic.
    """
    page = 1
    all_bills = []
    
    while True:
        url = f"{base_url}{session_year}?key={api_key}&page={page}"
        
        try:
            response = requests.get(url)

            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                bill_data = response.json()  # JSON response parsing

                if bill_data.get("result") and bill_data["result"].get("items"):
                    bills = bill_data["result"]["items"]
                    print(f"Fetched {len(bills)} bills from page {page}.")

                    # Add the fetched bills to the list
                    all_bills.extend(bills)

                    # Check if there are more pages
                    if len(bills) < 50:  # Assuming the page size is 50
                        break  # No more bills to fetch
                    page += 1  # Fetch the next page
                else:
                    print(f"No bills found for session year {session_year}.")
                    break
            else:
                print(f"Failed to retrieve bills. Status code: {response.status_code}")
                print(f"Response: {response.text}")
                break
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            break

    # Apply committee filter after fetching all the bills
    if committee_filter:
        all_bills = [bill for bill in all_bills if 
                     (bill.get("status", {}).get("committeeName") in committee_filter) or
                     any(milestone.get("committeeName") in committee_filter for milestone in bill.get("milestones", {}).get("items", []))]

    return all_bills

In [11]:
def flatten_json(nested_json, parent_key='', sep='_'):
    """
    Flatten a nested JSON into a flat dictionary.
    """
    items = []
    for k, v in nested_json.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_json(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            for i, item in enumerate(v):
                items.extend(flatten_json(item, f"{new_key}_{i}", sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [12]:
def get_column_data(item, column_mappings):
    """
    Create a row of data based on the column mappings.
    """
    row_data = {}

    for column, key in column_mappings.items():
        if key:
            # Check if the key exists in the flattened JSON data and add it to the row
            value = item.get(key, "")

            # If the column is 'Date Introed', format it as 'YYYY-MM-DD'
            if column == "Date Introed" and value:
                try:
                    # Convert to date format (YYYY-MM-DD)
                    formatted_date = datetime.fromisoformat(value).date()
                    row_data[column] = formatted_date
                except ValueError:
                    row_data[column] = value  # If it can't be converted, leave the original value
            else:
                row_data[column] = value
        else:
            # Handle empty columns that need to be calculated or filled manually
            if column == "Last Updated":
                row_data[column] = datetime.now().strftime("%Y-%m-%d")  # Set today's date for 'Last Updated'
            else:
                row_data[column] = ""

    return row_data

In [13]:
def export_to_excel(bill_data, filename="NY_Senate_bills_data"):
    """
    Export the flattened bill data into an Excel file.
    """
    flattened_data = []

    for item in bill_data:
        flattened_item = flatten_json(item)
        row_data = get_column_data(flattened_item, column_mappings)
        flattened_data.append(row_data)

    # Create a DataFrame
    df = pd.DataFrame(flattened_data)

    # Export the data to Excel
    if not df.empty:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{filename}_{timestamp}.xlsx"
        df.to_excel(filename, index=False)
        print(f"Data exported to {filename}")
    else:
        print("No data to export to Excel.")

In [14]:
def main():
    session_year = 2025  # Specify the session year (e.g., 2023)
    
    # Define a list of committees to filter by (e.g., 'Education', 'Judiciary')
    committee_filter = ['Elections'] 

    bill_data = fetch_bills_by_year(api_key, session_year, committee_filter)

    if bill_data:
        # Export the filtered and flattened data to Excel
        export_to_excel(bill_data)
    else:
        print("No data retrieved from the API.")

if __name__ == "__main__":
    main()

Fetched 50 bills from page 1.
Fetched 50 bills from page 2.
Fetched 50 bills from page 3.
Fetched 50 bills from page 4.
Fetched 50 bills from page 5.
Fetched 50 bills from page 6.
Fetched 50 bills from page 7.
Fetched 50 bills from page 8.
Fetched 50 bills from page 9.
Fetched 50 bills from page 10.
Fetched 50 bills from page 11.
Fetched 50 bills from page 12.
Fetched 50 bills from page 13.
Fetched 50 bills from page 14.
Fetched 50 bills from page 15.
Fetched 50 bills from page 16.
Fetched 50 bills from page 17.
Fetched 50 bills from page 18.
Fetched 50 bills from page 19.
Fetched 50 bills from page 20.
Fetched 50 bills from page 21.
Fetched 50 bills from page 22.
Fetched 50 bills from page 23.
Fetched 50 bills from page 24.
Fetched 50 bills from page 25.
Fetched 50 bills from page 26.
Fetched 50 bills from page 27.
Fetched 50 bills from page 28.
Fetched 50 bills from page 29.
Fetched 50 bills from page 30.
Fetched 50 bills from page 31.
Fetched 50 bills from page 32.
Fetched 50 bills 