In [71]:
import pandas as pd
import pytesseract
from PIL import Image
import re
from IPython.display import display
import requests
import json
from requests.adapters import HTTPAdapter, Retry
from dotenv import load_dotenv
from openai import OpenAI 
import os

In [72]:
load_dotenv()

True

In [73]:
MODEL="gpt-4o"
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [51]:

# Load the data into a pandas DataFrame
file_path = 'feccandidates2024.txt'

In [84]:
# Define the column names and their corresponding data types
columns = [
    "CAND_ID", "CAND_NAME", "CAND_ICI", "PTY_CD", "CAND_PTY_AFFILIATION",
    "TTL_RECEIPTS", "TRANS_FROM_AUTH", "TTL_DISB", "TRANS_TO_AUTH", "COH_BOP",
    "COH_COP", "CAND_CONTRIB", "CAND_LOANS", "OTHER_LOANS", "CAND_LOAN_REPAY",
    "OTHER_LOAN_REPAY", "DEBTS_OWED_BY", "TTL_INDIV_CONTRIB", "CAND_OFFICE_ST",
    "CAND_OFFICE_DISTRICT", "SPEC_ELECTION", "PRIM_ELECTION", "RUN_ELECTION",
    "GEN_ELECTION", "GEN_ELECTION_PRECENT", "OTHER_POL_CMTE_CONTRIB", "POL_PTY_CONTRIB",
    "CVG_END_DT", "INDIV_REFUNDS", "CMTE_REFUNDS"
]

df = pd.read_csv(file_path, delimiter='|', header=None, names=columns)

# Convert appropriate columns to numeric types
numeric_columns = [
    "TTL_RECEIPTS", "TRANS_FROM_AUTH", "TTL_DISB", "TRANS_TO_AUTH", "COH_BOP",
    "COH_COP", "CAND_CONTRIB", "CAND_LOANS", "OTHER_LOANS", "CAND_LOAN_REPAY",
    "OTHER_LOAN_REPAY", "DEBTS_OWED_BY", "TTL_INDIV_CONTRIB", "GEN_ELECTION_PRECENT",
    "OTHER_POL_CMTE_CONTRIB", "POL_PTY_CONTRIB", "INDIV_REFUNDS", "CMTE_REFUNDS"
]

df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Convert 'CVG_END_DT' to datetime format
df['CVG_END_DT'] = pd.to_datetime(df['CVG_END_DT'], format='%m/%d/%Y', errors='coerce')

# Display the DataFrame with correct data types
#print(df.dtypes)
# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,CAND_ID,CAND_NAME,CAND_ICI,PTY_CD,CAND_PTY_AFFILIATION,TTL_RECEIPTS,TRANS_FROM_AUTH,TTL_DISB,TRANS_TO_AUTH,COH_BOP,...,SPEC_ELECTION,PRIM_ELECTION,RUN_ELECTION,GEN_ELECTION,GEN_ELECTION_PRECENT,OTHER_POL_CMTE_CONTRIB,POL_PTY_CONTRIB,CVG_END_DT,INDIV_REFUNDS,CMTE_REFUNDS
0,H2AK00200,"CONSTANT, CHRISTOPHER",C,1,DEM,0.0,0.0,0.0,0.0,0.0,...,,,,,,0.0,0.0,2024-03-31,0.0,0.0
1,H2AK01158,"PELTOLA, MARY",I,1,DEM,4618455.38,82500.0,2734884.52,0.0,691260.3,...,,,,,,870888.2,2473.34,2024-03-31,34628.09,2500.0
2,H2AK00226,"PALIN, SARAH",O,2,REP,3866.49,0.0,50246.07,0.0,46379.58,...,,,,,,0.0,0.0,2023-03-31,-9152.5,0.0
3,H2AK01083,"BEGICH, NICHOLAS III",C,2,REP,608018.29,0.0,471138.27,0.0,41233.99,...,,,,,,1000.0,0.0,2024-03-31,650.0,0.0
4,H4AK00156,"DAHLSTROM, NANCY",C,2,REP,466431.55,161433.44,70250.11,0.0,0.0,...,,,,,,199516.02,0.0,2024-03-31,0.0,0.0


In [134]:
def extract_candidate_names(query):
    # Enhanced regex to match "Last Name, First Name" or variations
    return re.findall(r'[A-Z][a-z]+, [A-Z][a-z]+', query)

In [146]:
def get_candidate_data(candidate_names, df):
    data_strs = []
    for name in candidate_names:
        # Find candidate data using exact match
        candidate_data = df[df['CAND_NAME'] == name]
        if not candidate_data.empty:
            data_str = candidate_data.to_string(index=False)
            data_strs.append(f"{name}:\n{data_str}")
        else:
            data_strs.append(f"No data found for {name}")

    if data_strs:
        prompt = f"Based on the following data:\n\n{'\n\n'.join(data_strs)}"
    else:
        prompt = "No valid candidate data found to compare."

    return prompt

In [136]:
candidate_system_prompt = '''
Your goal is to extract key information from candidate data based on a provided query. You will be given a candidate's detailed data, and you will output a json object containing the requested information.

The json object should have the following structure:
{
    "candidate_name": string, // The name of the candidate
    "query_result": string // The result of the query based on the candidate's data
}

Please ensure the information is accurate and concise.
'''


In [152]:
candidate_system_prompt2 = '''
Your goal is to extract key information from candidate data based on a provided query. You will be given a candidate's detailed data, and you will output a summary narrative containing the requested information.
Include a comparison between them and a bulleted list of Pros and Cons between them
Please ensure the information is accurate and concise.
'''

In [155]:
def query_gpt(prompt):
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": candidate_system_prompt2},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2000,
        temperature=0.5
    )
    return response.choices[0].message.content

In [139]:
def get_candidate_info(query, candidate_name, df):
    candidate_df = df[df['CAND_NAME'] == candidate_name]
    data_str = candidate_df.to_string(index=False)
    prompt = f"Based on the following data:\n\n{data_str}\n\n{query}"

    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "system",
                "content": candidate_system_prompt
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        max_tokens=150,
        temperature=0.1
    )

    return response.choices[0].message.content

In [141]:
candidate_name = "ALSOBROOKS, ANGELA"
query = "How much money does the he have?"
response = get_candidate_info(query, candidate_name, df)
print(response)

```json
{
    "candidate_name": "ALSOBROOKS, ANGELA",
    "query_result": "1909103.52"
}
```


In [156]:
candidate_names = ["ALSOBROOKS, ANGELA", "HOGAN, LARRY"]  # Directly specify candidate names
prompt = get_candidate_data(candidate_names, df)
#print (prompt)

In [157]:
response = query_gpt(prompt)
print(response)

### Candidate Comparison: Angela Alsobrooks vs. Larry Hogan

#### Angela Alsobrooks
- **Party Affiliation:** Democrat
- **Total Receipts:** $7,784,331.53
- **Transfers from Authorized Committees:** $1,181,935.29
- **Total Disbursements:** $5,875,228.01
- **Cash on Hand at Beginning of Period:** $0.00
- **Cash on Hand at Close of Period:** $1,909,103.52
- **Total Individual Contributions:** $6,352,939.12
- **Other Political Committee Contributions:** $243,155.08
- **Political Party Contributions:** $0.00
- **Coverage End Date:** 2024-04-24
- **Individual Refunds:** $144,281.45
- **Committee Refunds:** $9,400.00

#### Larry Hogan
- **Party Affiliation:** Republican
- **Total Receipts:** $3,074,711.83
- **Transfers from Authorized Committees:** $1,715,601.79
- **Total Disbursements:** $1,289,904.75
- **Cash on Hand at Beginning of Period:** $0.00
- **Cash on Hand at Close of Period:** $1,784,807.08
- **Total Individual Contributions:** $1,067,510.04
- **Other Political Committee Contribut