In [None]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
import os
model = OpenAIServerModel(
    model_id="gpt-4o-mini",
    api_key=os.environ["OPENAI_API_KEY"],
)
agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
agent.run("Who is Brandon Eychaner?")

In [1]:
"""
    CDG Client - An example client for the Congress.gov API.

    @copyright: 2022, Library of Congress
    @license: CC0 1.0
"""
from urllib.parse import urljoin
import requests
import logging 

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


API_VERSION = "v3"
ROOT_URL = "https://api.congress.gov/"
RESPONSE_FORMAT = "json"


class _MethodWrapper:
    """ Wrap request method to facilitate queries.  Supports requests signature. """

    def __init__(self, parent, http_method):
        self._parent = parent
        self._method = getattr(parent._session, http_method)

    def __call__(self, endpoint, *args, **kwargs):  # full signature passed here
        response = self._method(
            urljoin(self._parent.base_url, endpoint), *args, **kwargs
        )
        if response.headers.get("content-type", "").startswith("application/json"):
            return response.json()
        else:
            return response.content

class CDGClient:
    """ A sample client to interface with Congress.gov. """

    def __init__(
        self,
        api_key,
        api_version=API_VERSION,
        response_format=RESPONSE_FORMAT,
        raise_on_error=True,
        added_headers=None
    ):
        self.base_url = urljoin(ROOT_URL, api_version) + "/"
        self._session = requests.Session()

        # do not use url parameters, even if offered, use headers
        self._session.params = {"format": response_format}
        self._session.headers.update({"x-api-key": api_key})
        if added_headers:
            self._session.headers.update(added_headers)

        if raise_on_error:
            self._session.hooks = {
                "response": lambda r, *args, **kwargs: r.raise_for_status()
            }

    def __getattr__(self, method_name):
        """Find the session method dynamically and cache for later."""
        method = _MethodWrapper(self, method_name)
        self.__dict__[method_name] = method
        return method

In [77]:
from datetime import datetime
from enum import StrEnum
from pydantic import BaseModel, HttpUrl, Field, field_validator
from typing import List, Annotated, Optional

class Format(BaseModel):
    type: str
    url: HttpUrl

class TextVersion(BaseModel):
    date: datetime
    formats: List[Format]
    type: str

class PolicyArea(BaseModel):
    name: str
    update_date: Annotated[datetime, Field(alias='updateDate')] = None

class LegislativeSubject(BaseModel):
    name: str
    update_date: Annotated[datetime, Field(alias='updateDate')] = None

class Chamber(StrEnum):
    HOUSE = "House"
    SENATE = "Senate"

class LawType(StrEnum):
    PUBLIC = "Public Law"
    PRIVATE = "Private Law"

    def __init__(self, value):
        self._value_ = value
        if value == "Public Law":
            self.type_url = "pub"
        elif value == "Private Law":
            self.type_url = "priv"

class LatestAction(BaseModel):
    action_date: Annotated[datetime, Field(alias="actionDate")]
    text: str

class Note(BaseModel):
    texts: Annotated[List[str], Field(alias='text')]
    text: str = ""
    def __init__(self, text: str):
        self.text = ["\n".join(x["text"]) for x in text]

class Summary(BaseModel):
    action_date: Annotated[datetime, Field(alias='actionDate')]
    action_desc: Annotated[str, Field(alias='actionDesc')]
    text: str
    updateDate: Annotated[datetime, Field(alias='updateDate')]
    version_code: Annotated[str, Field(alias='versionCode')]

class CommitteeMetadata(BaseModel):
    name: str
    system_code: Annotated[str, Field(alias='systemCode')]
    url: HttpUrl
    
class SourceSystem(BaseModel):
    name: str

class Activity(BaseModel):
    date: datetime
    name: str

class IdentifyingEntity(StrEnum):
    HOUSE = "House"
    SENATE = "Senate"
    CRS = "CRS" # Congressional Research Service

class RelationshipDetail(BaseModel):
    identified_by: Annotated[IdentifyingEntity, Field(alias='identifiedBy')] = None
    type: str

class CountUrl(BaseModel):
    count: int
    url: HttpUrl

class Title(BaseModel):
    title: str
    title_type: Annotated[str, Field(alias='titleType')]
    title_type_code: Annotated[int, Field(alias='titleTypeCode')]
    update_date: Annotated[datetime, Field(alias='updateDate')]
    bill_text_version_code: Annotated[str, Field(alias='billTextVersionCode')] = ""
    bill_text_version_name: Annotated[str, Field(alias='billTextVersionName')] = ""

class ChamberCode(StrEnum):
    house = "H"
    senate = "S"

class BillType(StrEnum):
    def __init__(self, value):
        self._value_ = value
        self.type_url = value.lower()
    
    HR = "HR" # Bill introduced in House
    S = "S" # Bill introduced in Senate
    HJRES = "HJRES" # Joint resolution introduced in House
    SJRES = "SJRES" # Joint resolution introduced in Senate
    HCONRES = "HCONRES" # Concurrent resolution introduced in House
    SCONRES = "SCONRES" # Concurrent resolution introduced in Senate
    HRES = "HRES" # Simple resolution introduced in House
    SRES = "SRES" # Simple resolution introduced in Senate

class Member(BaseModel):
    bioguide_id: Annotated[str, Field(alias='bioguideId')]
    firstName: str
    first_name: Annotated[str, Field(alias='firstName')]
    full_name: Annotated[str, Field(alias='fullName')]
    last_name: Annotated[str, Field(alias='lastName')]
    party: str
    state: str
    url: HttpUrl
    middle_name: Annotated[str, Field(alias='middleName')] = ""
    district: Optional[int] = None
    is_original_cosponsor: Annotated[bool, Field(alias='isOriginalCosponsor')] = False
    is_by_request: Annotated[str, Field(alias='isByRequest')] = ""

class LawMetadata(BaseModel):
    number: str
    law_type: Annotated[LawType, Field(alias="type")]

    @field_validator('law_type', mode='before')
    def convert_law_type(cls, value):
        if value == "Public Law":
            return LawType.PUBLIC
        elif value == "Private Law":
            return LawType.PRIVATE
        raise ValueError("Invalid law type")

class Law(BaseModel):
    congress: int
    latest_action: Annotated[LatestAction, Field(alias="latestAction")]
    laws: List[LawMetadata]
    number: str
    origin_chamber: Annotated[Chamber, Field(alias="originChamber")]
    origin_chamber_code: Annotated[ChamberCode, Field(alias="originChamberCode")]
    title: str
    bill_type: Annotated[BillType, Field(alias="type")]
    update_date: Annotated[datetime, Field(alias="updateDate")]
    update_date_including_text: Annotated[datetime, Field(alias="updateDateIncludingText")]
    url: HttpUrl

class Committee(BaseModel):
    activities: List[Activity]
    chamber: str
    name: str
    system_code: Annotated[str, Field(alias='systemCode')]
    type: str
    url: HttpUrl

class Action(BaseModel):
    action_date: Annotated[datetime, Field(alias='actionDate')] = None
    committees: Optional[List[CommitteeMetadata]] = []
    source_system: Annotated[SourceSystem, Field(alias='sourceSystem')]
    text: str
    type: str
    action_code: Annotated[str, Field(alias='actionCode')] = ""
    action_time: Annotated[datetime, Field(alias='actionTime')] = None

class Amendment(BaseModel):
    congress: int
    latest_action: Annotated[LatestAction, Field(alias='latestAction')]
    number: str
    purpose: str
    type: str
    update_date: Annotated[datetime, Field(alias='updateDate')]
    url: HttpUrl

class BillMetadata(BaseModel):
    congress: int
    latest_action: Annotated[LatestAction, Field(alias="latestAction")]
    number: int
    relationship_details: Annotated[List[RelationshipDetail], Field(alias="relationshipDetails")] = []
    title: str
    type: str
    url: HttpUrl

class Subjects(BaseModel):
    legislative_subjects: Optional[List[LegislativeSubject]] = []
    policy_area: Annotated[PolicyArea, Field(alias='policyArea')]

class Bill(BaseModel):
    congress: int
    constitutional_authority_statement_text: Annotated[str, Field(alias="constitutionalAuthorityStatement")] = ""
    introduced_date: Annotated[datetime, Field(alias="introducedDate")] = None
    latest_action: Annotated[LatestAction, Field(alias="latestAction")]
    laws: Annotated[List[LawMetadata], Field(alias="laws")] = []
    number: str
    origin_chamber: Annotated[Chamber, Field(alias="originChamber")]
    origin_chamber_code: Annotated[ChamberCode, Field(alias="originChamberCode")]
    policy_area: Annotated[PolicyArea, Field(alias="policyArea")] = None
    sponsors: List[Member] = []
    title: str
    type: str
    update_date: Annotated[datetime, Field(alias="updateDate")]
    update_date_including_text: Annotated[datetime, Field(alias="updateDateIncludingText")]
    notes: Annotated[Note, Field(alias="notes")] = None


In [9]:
def retrieve_laws(start_date: str, end_date: str, congress: int, offset: int) -> dict:
    """
    Retrieve the details of a specific law.

    Args:
        start_date (str): The start date for the search in the format "YYYY-MM-DDTHH:MM:SSZ".
        end_date (str): The end date for the search in the format "YYYY-MM-DDTHH:MM:SSZ".
    Returns:
        dict: A dictionary of the law details.
    """
    params = {
        "api_key": congress_api_key,
        "format": RESPONSE_FORMAT,
        "fromDateTime": start_date,
        "toDateTime": end_date
    }
    API_HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)',
        'accept': 'application/xml',
        "x-api-key": congress_api_key}
    
    if offset > 0:
        params["offset"] = offset
    
    response = requests.get(f"{BASE_URL}law/{congress}", headers=API_HEADERS, params=params)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        response.raise_for_status()

laws = retrieve_laws("2024-02-15T00:00:00Z", "2024-03-30T00:00:00Z", 118, 0)

In [None]:
def retrieve_specific_law(congress: int, law_type: str, law_number: int) -> dict:
    """
    Retrieve the details of a specific law.

    Args:
        congress (int): The Congress number.
        law_type (str): The type of law; may be "pub" or "priv".
        law_number (int): The law number.
    Returns:
        dict: A dictionary of the law details.
    """
    params = {
        "api_key": congress_api_key,
        "format": RESPONSE_FORMAT
    }
    API_HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)',
        'accept': 'application/xml',
        #"x-api-key": congress_api_key
        }
    
    response = requests.get(f"{BASE_URL}law/{congress}/{law_type}/{law_number}", headers=API_HEADERS, params=params)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        response.raise_for_status()

retrieve_specific_law("118", "hr", '5103')

In [11]:
client = CDGClient(api_key=os.environ["CONGRESS_API_KEY"], response_format=RESPONSE_FORMAT)

In [24]:
client = CDGClient(api_key=os.environ["CONGRESS_API_KEY"])
client.get("law/118/pub")

{'bills': [{'congress': 118,
   'latestAction': {'actionDate': '2025-01-04',
    'text': 'Became Public Law No: 118-229.'},
   'laws': [{'number': '118-229', 'type': 'Public Law'}],
   'number': '5103',
   'originChamber': 'House',
   'originChamberCode': 'H',
   'title': 'FISHES Act',
   'type': 'HR',
   'updateDate': '2025-02-01',
   'updateDateIncludingText': '2025-02-01',
   'url': 'https://api.congress.gov/v3/bill/118/hr/5103?format=json'},
  {'congress': 118,
   'latestAction': {'actionDate': '2025-01-05',
    'text': 'Became Public Law No: 118-273.'},
   'laws': [{'number': '118-273', 'type': 'Public Law'}],
   'number': '82',
   'originChamber': 'House',
   'originChamberCode': 'H',
   'title': 'Social Security Fairness Act of 2023',
   'type': 'HR',
   'updateDate': '2025-01-31',
   'updateDateIncludingText': '2025-01-31',
   'url': 'https://api.congress.gov/v3/bill/118/hr/82?format=json'},
  {'congress': 118,
   'latestAction': {'actionDate': '2024-12-12',
    'text': 'Became

In [85]:
from urllib.parse import urljoin, urlparse, parse_qs
import requests
import logging
import os
import time
from tqdm import tqdm

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

API_VERSION = "v3"
ROOT_URL = "https://api.congress.gov/"
RESPONSE_FORMAT = "json"
RESULT_LIMIT = 250
RATE_LIMIT_CONSTANT = 5000 / 60 / 60  # 5000 requests per hour

class _MethodWrapper:
    """ Wrap request method to facilitate queries.  Supports requests signature. """

    def __init__(self, parent, http_method):
        self._parent = parent
        self._method = getattr(parent._session, http_method)

    def __call__(self, endpoint, *args, **kwargs):  # full signature passed here
        response = self._method(
            urljoin(self._parent.base_url, endpoint), *args, **kwargs
        )
        if response.headers.get("content-type", "").startswith("application/json"):
            return response.json()
        else:
            return response.content

class CDGClient:
    """ A sample client to interface with Congress.gov. """

    def __init__(
        self,
        api_key,
        api_version=API_VERSION,
        response_format=RESPONSE_FORMAT,
        raise_on_error=True,
        added_headers=None
    ):
        self.base_url = urljoin(ROOT_URL, api_version) + "/"
        self._session = requests.Session()

        # do not use url parameters, even if offered, use headers
        self._session.params = {"format": response_format}
        self._session.headers.update({"x-api-key": api_key})
        if added_headers:
            self._session.headers.update(added_headers)

        if raise_on_error:
            self._session.hooks = {
                "response": lambda r, *args, **kwargs: r.raise_for_status()
            }

    def __getattr__(self, method_name):
        """Find the session method dynamically and cache for later."""
        method = _MethodWrapper(self, method_name)
        self.__dict__[method_name] = method
        return method

    def retrieve_congress_bills(self, from_date: str, to_date: str, offset: int = 0):
        params = {
            "limit": RESULT_LIMIT,
            "fromDateTime": from_date,
            "toDateTime": to_date
        }
        
        if offset > 0:
            params["offset"] = offset

        response = self.get("bill", params=params)
        bills = response.get("bills", [])
        if "next" in response.get("pagination", {}):
            offset = self.extract_offset(response["pagination"]["next"])
            return (bills, response, offset, response["pagination"]["count"])
        return (bills, response, -1, 0)

    def extract_offset(self, url: str) -> int:
        parsed_url = urlparse(url)
        offset = parse_qs(parsed_url.query).get('offset', [0])[0]
        return int(offset)
    
    def get_laws(self, start_date: str, end_date: str, congress: int, offset: int = 0):
        params = {
            "fromDateTime": start_date,
            "toDateTime": end_date
        }
        if offset > 0:
            params["offset"] = offset
        response = self.get(f"law/{congress}", params=params)
        laws = response.get("laws", [])
        if "next" in response.get("pagination", {}):
            offset = self.extract_offset(response["pagination"]["next"])
            return (laws, response, offset, response["pagination"]["count"])
        return (laws, response, -1, 0)
    
    def get_law_details(self, congress: int, law_type: str, law_number: int):
        response = self.get(f"law/{congress}/{law_type}/{law_number}")
        return response
    
    def get_bill_data(self, bill_data: dict) -> dict:
        """
        Get additional data for a bill.

        Args:
            client (CDGClient): The client object.
            bill_data (dict): The bill data.

        Returns:
            dict: The additional data, with keys for each additional data type.
        """
        # Currently available endpoints for additional data on bills
        additional_bill_data = {
            'actions': 'actions',
            'amendments': 'amendments',
            'committees': 'committees',
            'cosponsors': 'cosponsors',
            'relatedBills': 'relatedbills',
            'subjects': 'subjects',
            'summaries': 'summaries',
            'textVersions': 'text',
            'titles': 'titles'
        }
        
        congress = bill_data["congress"]
        bill_type = bill_data["type"].lower()
        bill_number = bill_data["number"]
        
        for key, endpoint in additional_bill_data.items():
            data = self.get(f"bill/{congress}/{bill_type}/{bill_number}/{endpoint}")
            bill_data[key] = data[key]
        
        return bill_data

def determine_pagination_wait(start_time: float, offset: int):
    """
    Determine the wait time based on the rate limit constant.
    
    Args:
        start_time (float): The start time of the request.
        offset (int): The offset for the request.
        
    Returns:
        None
    """
    current_time = time.time()
    elapsed_time = current_time - start_time
    print(f"Elapsed time: {elapsed_time}")
    requests = max(RESULT_LIMIT, offset) / RESULT_LIMIT
    rate = elapsed_time / requests
    if rate < RATE_LIMIT_CONSTANT:
        wait_time = RATE_LIMIT_CONSTANT - rate
        print(f"Sleeping for {wait_time} seconds.")
        time.sleep(wait_time)


def determine_simple_wait(start_time: float, api_call_count: int):
    """
    Determine the wait time based on the rate limit constant.

    Args:
        start_time (float): The start time of the request.
        api_call_count (int): The number of API calls made.
    """
    current_time = time.time()
    elapsed_time = current_time - start_time
    rate = elapsed_time / api_call_count
    if rate < RATE_LIMIT_CONSTANT:
        wait_time = RATE_LIMIT_CONSTANT - rate
        print(f"Sleeping for {wait_time} seconds.")
        time.sleep(wait_time)

def gather_congress_bills(client: CDGClient, from_date: str, to_date: str):
    start = time.time()
    bills = []
    responses = []
    offset = 0
    total_count = None
    pbar = None

    while offset != -1:
        result, response, offset, count = client.retrieve_congress_bills(from_date, to_date, offset)
        bills.extend(result)
        responses.append(response)
        if total_count is None:
            total_count = count
            pbar = tqdm(total=total_count, desc="Retrieving bills")
        pbar.update(len(result))
        determine_pagination_wait(start, offset)  # Prevent rate limiting
    if pbar:
        pbar.close()
    return bills, responses



In [None]:
# Usage
client = CDGClient(api_key=os.environ["CONGRESS_API_KEY"], response_format=RESPONSE_FORMAT)
bills, responses = gather_congress_bills(client, "2024-02-01T00:00:00Z", "2024-03-30T00:00:00Z")

In [None]:
client = CDGClient(api_key=os.environ["CONGRESS_API_KEY"], response_format=RESPONSE_FORMAT)
bill_data = []
api_hit_count = 0
start = time.time()
for i, bill in enumerate(bills[:1000]):
    # Progress
    if i % 10 == 0:
        print(f"{i}/1000")
    bill_data.append(client.get_bill_data(bill))
    api_hit_count += 10
    determine_simple_wait(start, api_hit_count)  # Prevent rate limiting

0/1000
Sleeping for 1.025996102227105 seconds.
Sleeping for 0.9961101611455281 seconds.
Sleeping for 0.9913580329329879 seconds.
Sleeping for 0.9873335493935478 seconds.
Sleeping for 0.9681432829962836 seconds.
Sleeping for 0.920016054753904 seconds.
Sleeping for 0.9291596942477756 seconds.
Sleeping for 0.936571803357866 seconds.
Sleeping for 0.9339035675849443 seconds.
Sleeping for 0.9294780095418294 seconds.
10/1000
Sleeping for 0.9294344343320287 seconds.
Sleeping for 0.9276267687479655 seconds.
Sleeping for 0.9281252204862415 seconds.
Sleeping for 0.9296505791800362 seconds.
Sleeping for 0.9295739032604076 seconds.
Sleeping for 0.9303441378805373 seconds.
Sleeping for 0.9311669745476416 seconds.
Sleeping for 0.9315794132373951 seconds.
Sleeping for 0.9317746078758908 seconds.
Sleeping for 0.9323698944515652 seconds.
20/1000
Sleeping for 0.9332338350790518 seconds.
Sleeping for 0.9322789594380543 seconds.
Sleeping for 0.9328347876452017 seconds.
Sleeping for 0.9333377778530121 secon

HTTPError: 429 Client Error: Too Many Requests for url: https://api.congress.gov/v3/bill/113/hr/154/text?format=json

In [73]:
# Save bills to file 
import json
from datetime import datetime

def save_bills(bills: list[dict], filename: str):
    with open(filename, "w") as f:
        json.dump(bills, f)

def load_bills(filename: str) -> list[dict]:
    with open(filename, "r") as f:
        return json.load(f)
    
save_bills(bills, f"bills_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json")


In [90]:
def save_additional_data(data: list, filename: str):
    with open(filename, "w") as f:
        json.dump(data, f)

def load_additional_data(filename: str) -> list:
    with open(filename) as f:
        data = json.load(f)
    return data

save_additional_data(bill_data, "additional_data.json")

In [None]:
extra_data = []

def calculate_simple_wait(start_time: float, api_hit_count: int) -> float:
    current_time = time.time()
    elapsed_time = current_time - start_time
    rate = elapsed_time / api_hit_count
    if rate < RATE_LIMIT_CONSTANT:
        wait_time = RATE_LIMIT_CONSTANT - rate
        print(f"Sleeping for {wait_time} seconds.")
        time.sleep(wait_time)

api_hit_count = 0
for bill in bills:
    start = time.time()
    api_hit_count += 1
    bill_data = get_bill_data(client, bill)
    extra_data.append(bill_data)

{'congress': 118,
 'latestAction': {'actionDate': '2023-10-18',
  'text': 'Referred to the Committee on the Judiciary. (text: CR S5090-5091)'},
 'number': '413',
 'originChamber': 'Senate',
 'originChamberCode': 'S',
 'title': 'A resolution condemning foreign nationals in the United States who have endorsed and espoused the actions of foreign terrorist organizations (FTO) in Gaza who, on October 7, 2023, launched attacks against the State of Israel, and killed innocent Israeli and United States citizens.',
 'type': 'SRES',
 'updateDate': '2024-03-14',
 'updateDateIncludingText': '2024-03-14',
 'url': 'https://api.congress.gov/v3/bill/118/sres/413?format=json',
 'actions': [{'actionDate': '2023-10-18',
   'committees': [{'name': 'Judiciary Committee',
     'systemCode': 'ssju00',
     'url': 'https://api.congress.gov/v3/committee/senate/ssju00?format=json'}],
   'sourceSystem': {'name': 'Senate'},
   'text': 'Referred to the Committee on the Judiciary. (text: CR S5090-5091)',
   'type':