In [9]:
import pandas as pd
from llama_index import VectorStoreIndex, StorageContext
import llama_index
from llama_index.node_parser import SimpleNodeParser
from llama_index.vector_stores import PGVectorStore
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file - assumes an OPENAI_API_KEY and BILL_DB_CONN_STRING (postgres vector database) are defined

In [None]:
import psycopg2
connection_string = os.getenv("BILL_DB_CONN_STRING")
db_name = "postgres"
conn = psycopg2.connect(connection_string)
conn.autocommit = True

In [None]:
# CHIPS act
bs_reader = llama_index.BeautifulSoupWebReader()
documents = bs_reader.load_data(urls=['https://www.congress.gov/117/bills/hr4346/BILLS-117hr4346enr.xml'])

In [None]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="chips_act",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

In [None]:
# Inflation Reduction Act
bs_reader = llama_index.BeautifulSoupWebReader()
documents = bs_reader.load_data(urls=['https://www.congress.gov/117/bills/hr5376/BILLS-117hr5376enr.xml'])

In [None]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="inflation_reduction_act",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

In [10]:
# Fiscal Responsibility act of 2023
bs_reader = llama_index.BeautifulSoupWebReader()
documents = bs_reader.load_data(urls=['https://www.congress.gov/118/bills/hr3746/BILLS-118hr3746enr.xml'])

In [12]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="fiscal_responsibility_act",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00, 13.70it/s]
Generating embeddings: 100%|██████████| 25/25 [00:01<00:00, 19.68it/s]
