In [53]:
from openai import OpenAI
import os
import pandas as pd
import sqlite3
from dotenv import load_dotenv
from tqdm.notebook import tqdm
from groq import Groq


In [60]:
load_dotenv()

db_path = "data/bill_data.db"

openai_model = "gpt-3.5-turbo-0125"
max_length = 256

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

groq_model="mixtral-8x7b-32768"

with open('summarize_prompt.txt', 'r') as file:
    prompt_instructions = file.read()

In [59]:
def create_prompt(instructions, query):
    prompt = [
            {
                "role": "system",
                "content": instructions
            },
            {
                "role": "user",
                "content": query
            }
        ]
    return prompt

def query_openai(client, instructions, query):
    prompt = create_prompt(instructions, query)

    response = client.chat.completions.create(
        model=openai_model,
        messages=prompt,
        max_tokens=max_length,
    )

    return response.choices[0].message.content

def query_groq(client, instructions, query):
    prompt = create_prompt(instructions, query)

    response = client.chat.completions.create(
        model=groq_model,
        messages=prompt
    )

    return response.choices[0].message.content

In [27]:
def get_bill_data(limit=None):
    conn = sqlite3.connect(db_path)
    
    query = '''
        SELECT 
            b.bill_congress, 
            b.bill_type, 
            b.bill_number,
            b.bill_version,
            b.policy_area,
            b.subjects,
            bt.readable_text
        FROM bills b
        LEFT JOIN bill_text bt ON b.bill_congress = bt.bill_congress 
                               AND b.bill_number = bt.bill_number
    '''
    
    if limit is not None and limit > 0:
        query += f'\nLIMIT {limit}'
    
    df = pd.read_sql_query(query, conn)
    
    conn.close()
    
    # Extract bill version suffixes from the readable_text column
    df['bill_version_suffix'] = df['readable_text'].str.extract(r'(\b\w+\b)(?=:)', expand=False)
    
    # Select the latest version of each bill based on the bill_version_suffix
    df = df.loc[df.groupby(['bill_congress', 'bill_type', 'bill_number'])['bill_version_suffix'].idxmax()]
    
    return df

pd.reset_option('display.width')
pd.reset_option('display.max_colwidth')
bill_df = get_bill_data(limit=10)
print(bill_df.head())

   bill_congress bill_type bill_number bill_version            policy_area  \
1            113   HCONRES    HCONRES1        3.0.0               Congress   
2            113   HCONRES   HCONRES10        3.0.0              Education   
5            113   HCONRES  HCONRES100        3.0.0               Congress   
7            113   HCONRES  HCONRES101        3.0.0  International Affairs   
8            113   HCONRES  HCONRES102        3.0.0                 Health   

                                            subjects  \
1          Congressional operations and organization   
2  Assault and harassment offenses,Commemorative ...   
5  Commemorative events and holidays,Human rights...   
7  Asia,Congressional oversight,Detention of pers...   
8  Child health,Child safety and welfare,Commemor...   

                                       readable_text bill_version_suffix  
1  113 HCON 1 EH: Regarding consent to assemble o...                  EH  
2  113 HCON 10 IH: Supporting the goals and 

In [61]:
tqdm.pandas(desc="Generating summaries...")
bill_df['summary'] = bill_df['readable_text'].progress_apply(lambda x: query_groq(groq_client, prompt_instructions, x))
print(bill_df.head())

Generating summaries...:   0%|          | 0/6 [00:00<?, ?it/s]

   bill_congress bill_type bill_number bill_version            policy_area  \
1            113   HCONRES    HCONRES1        3.0.0               Congress   
2            113   HCONRES   HCONRES10        3.0.0              Education   
5            113   HCONRES  HCONRES100        3.0.0               Congress   
7            113   HCONRES  HCONRES101        3.0.0  International Affairs   
8            113   HCONRES  HCONRES102        3.0.0                 Health   

                                            subjects  \
1          Congressional operations and organization   
2  Assault and harassment offenses,Commemorative ...   
5  Commemorative events and holidays,Human rights...   
7  Asia,Congressional oversight,Detention of pers...   
8  Child health,Child safety and welfare,Commemor...   

                                       readable_text bill_version_suffix  \
1  113 HCON 1 EH: Regarding consent to assemble o...                  EH   
2  113 HCON 10 IH: Supporting the goals an

In [62]:
pd.set_option('display.max_colwidth', None)
print(bill_df['summary'])
pd.reset_option('display.max_colwidth')

1    Brief overview of the main topic and purpose:\n\nH. Con. Res. 1, 113th Congress, 1st Session, is a concurrent resolution that outlines the procedures for assembling Members of the House of Representatives and the Senate at a location outside the District of Columbia, should the public interest necessitate it. The resolution invokes clause 4, section 5, article I of the Constitution, which grants the Speaker of the House and the Majority Leader of the Senate (or their designees) the authority to call for such a gathering, following consultation with the respective Minority Leaders.\n\nIn-depth analysis of ideological elements:\n\n1. Legislative intent: The primary purpose of this resolution is to establish a formal process for assembling Congress outside of Washington D.C., should the need arise. The resolution does not explicitly reveal any particular ideological leanings, as it focuses on procedural matters rather than policy issues. However, the underlying ideology may be inferr