In [5]:
# Data Processing
import pandas as pd
import os
import openai
import csv
import json
from tqdm import tqdm


openai.organization = os.getenv("OPENAI_ORGANIZATION")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [6]:
election_emails_df = pd.read_csv('../data/2020_Election_Emails_with_common_sense.csv')

In [7]:
election_emails_df.head(5)

Unnamed: 0,from_name,from_address,subject,body_text,name,office_sought,party_affiliation,office_level,district_type,final_website,...,type,subtype,date,hour,day,uid_email,uid_inbox,incumbent,common_sense,self_evident
0,When Democrats Turn Out,info@whendemocratsturnout.com,Warren decries McConnell's heartlessness,"When Democrats Turn Out Alex, Has Mitch McConn...",Adair Ford Boroughs,U.S. House South Carolina District 2,Democratic Party,Federal,Congress,www.adairforcongress.com,...,,,2020-05-16,10,Sat,ab9765462c9a1429935b1b32da5d5066,f190775fa48aa60c7c54432da69a1f1f,No,True,False
1,When Democrats Turn Out,info@whendemocratsturnout.com,Blocking a GOP win in the closest House race i...,"When Democrats Turn Out Alex, In a dangerous p...",Adair Ford Boroughs,U.S. House South Carolina District 2,Democratic Party,Federal,Congress,www.adairforcongress.com,...,,,2020-10-21,12,Wed,848ad10e8460dee62a793d532bf8be4b,f190775fa48aa60c7c54432da69a1f1f,No,True,False
2,Jay Inslee,hello@jayinslee.com,Too many tragic anniversaries,We've done a lot here -- and we can do more to...,Jay Inslee,Governor of Washington,Democratic Party,State,State,www.jayinslee.com,...,,,2020-02-14,16,Fri,072b808bf9d8a7a7a4c4be0fe655f5f2,39439d634852541033a9c51ffe949ae0,Yes,True,False
3,Susan Wagle,team@teamwagle.com,Tough Times,Susan Wagle for Senate Alex -- We are in tough...,Susan Wagle,U.S. Senate Kansas,Republican Party,Federal,State,http://www.teamwagle.com/,...,,,2020-03-27,10,Fri,838e39a17cadf821f9d047faecf517be,6502e30a98d8576247eb9b8eb2950521,No,True,False
4,Susan Wagle,team@teamwagle.com,Coronavirus Update,Susan Wagle for Senate Alex -- Kansas now has ...,Susan Wagle,U.S. Senate Kansas,Republican Party,Federal,State,http://www.teamwagle.com/,...,,,2020-03-13,10,Fri,468d1c05618c033c4a02cd4fae37f04d,6502e30a98d8576247eb9b8eb2950521,No,True,False


In [8]:
def extract_commonsense(email):
    system_prompt = (
        f"You are a bot tasked with extracting population level common sense statements from emails. "
        f"The statements should be considered common sense by people. "
        f"If the statements you find, are not considered common sense by a portion of the population, do not include them in your answer. "
        f"You return the statements you find in a JSON list with only the statements. "
        f"Do not include the tags for JSON formatting in your output."
    )

    response = openai.ChatCompletion.create(
        model="gpt-4-1106-preview",
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": email,
            },
        ],
        temperature=0,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )

    return response

In [9]:
election_emails_df.body_text.values[:5]

array(['When Democrats Turn Out Alex, Has Mitch McConnell been living under a rock? The House Majority Leader has refused to support more emergency relief for Americans, saying he hasn’t “felt the urgency” to take action. Luckily, America has a progressive champion who’ll help him feel it. A May 11 tweet from @ewarren reads, "More than 80,000 people are dead, Senator. Is there a number that would make this more urgent for you?" Warren\'s tweet is in response to an article tweeted from @thehill, also on May 11, with the headline: "McConnell said there isn\'t need yet for fifth coronavirus relief bill: \'I don\'t think we have felt the urgency of acting immediately\'" [[URL REDACTED]] Source: Twitter Warren’s spirited defense of common sense and common decency drew one Twitter user to declare, “We need Elizabeth Warren as vice president. Now more than ever. #WarrenforVP.” While the decision over who the VP candidate will be is still being made, we want to know what you think now: Do you 

In [10]:
answers = []

for emails in tqdm(election_emails_df.body_text.values):
    gpt_res = extract_commonsense(emails)
    answers.append(gpt_res)

100%|██████████| 3912/3912 [1:27:29<00:00,  1.34s/it]  


In [11]:
data = []
for i, a in enumerate(answers):
    email_text = election_emails_df.body_text.values[i]
    extracted_content = json.loads(a.choices[0].message.content)

    # Calculate cost
    prompt_tokens = a.usage.prompt_tokens
    completion_tokens = a.usage.completion_tokens
    cost_of_prompt = (prompt_tokens / 1000) * 0.01
    cost_of_completion = (completion_tokens / 1000) * 0.03
    total_cost = cost_of_prompt + cost_of_completion

    data.append(
        [
            email_text,
            json.dumps(extracted_content),
            prompt_tokens,
            completion_tokens,
            cost_of_prompt,
            cost_of_completion,
            total_cost,
        ]
    )


columns = [
    "email",
    "statements",
    "prompt_tokens",
    "completion_tokens",
    "prompt_cost",
    "completion_cost",
    "total_cost",
]


df = pd.DataFrame(data, columns=columns)

df.to_csv(
    "../extracted_statements/election_emails/extracted_meta_data.csv", index=False
)

In [12]:
extracted_statements = []

for i, a in enumerate(answers):
    if len(json.loads(a.choices[0].message.content)) > 0:
        extracted_content = json.loads(a.choices[0].message.content)
        extracted_statements.extend(extracted_content)

with open(
    "../extracted_statements/election_emails/extracted_statements.csv",
    "w",
    newline="",
    encoding="utf-8",
) as file:
    writer = csv.writer(file)
    writer.writerow(["statements"])  # Header for the column
    for statement in extracted_statements:
        writer.writerow([statement])

In [13]:

markdown_output = "## Extracted Statements:\n\n"
for i, a in enumerate(answers):
    if len(json.loads(a.choices[0].message.content)) > 0:
        markdown_output += f"### Email {i + 1}\n"
        markdown_output += f"{election_emails_df.body_text.values[i]}\n\n"
        markdown_output += "Extracted statements:\n"
        extracted_content = json.loads(a.choices[0].message.content)
        for statement in extracted_content:
            markdown_output += f"- **{statement}**\n"
        markdown_output += "\n"

with open("../extracted_statements/election_emails/extracted_answers.md", "w") as file:
    file.write(markdown_output)