In [None]:
import pandas as pd
from openai import OpenAI
import os
from tqdm.auto import tqdm
tqdm.pandas(desc="Processing statements")

In [None]:
os.environ["OPENAI_API_KEY"] = 'XXXXX'

# Initialize the OpenAI client using the environment variable
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [None]:
dat = pd.read_csv("data_politifact.csv")

In [None]:
dat = dat[dat['verdict'].isin(['true', 'false'])]

In [None]:
dat = dat[dat['statement_source'].isin(['speech'])]

In [None]:
dat['statement'].iloc[1]

'Says he lived in a colonia while working as a farm worker.'

In [None]:
def triple(paragraph):
    completion = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
            {"role": "user",
             "content": f"From the paragraph below, extract a set of triples to construct a graph. Each triple needs to be in the following format: subject | predicate | object. \
                          Paragraph: {paragraph} \
                          In the output, please directly provide the output without any beginning sentence. Avoid contraction in the output. Start a new line for each triple."
                         }
        ])
    # Return the model's response
    return completion.choices[0].message.content

In [None]:
dat

Unnamed: 0,verdict,statement_originator,statement,statement_date,statement_source,factchecker,factcheck_date,factcheck_analysis_link,text
0,true,Barack Obama,John McCain opposed bankruptcy protections for...,6/11/2008,speech,Adriel Bettelheim,6/16/2008,https://www.politifact.com/factchecks/2008/jun...,Trying to portray his opponent as insensitive ...
31,false,David Alameel,Says he lived in a colonia while working as a ...,6/28/2014,speech,W. Gardner Selby,7/2/2014,https://www.politifact.com/factchecks/2014/jul...,"David Alameel, the Democratic nominee challeng..."
93,false,Georgia state senators,Say easing restrictions on selling alcohol on ...,3/16/2011,speech,Eric Stirgus,3/18/2011,https://www.politifact.com/factchecks/2011/mar...,Some Georgia senators opposed to a controversi...
108,true,Jeff Kottkamp,"""I've taken on the federal government and won.""",7/31/2010,speech,Aaron Sharockman,8/11/2010,https://www.politifact.com/factchecks/2010/aug...,"Lt. Gov. Jeff Kottkamp, a Republican running f..."
122,false,Chris Christie,Says the national health care law puts federal...,8/28/2012,speech,Erin O'Neill,8/30/2012,https://www.politifact.com/factchecks/2012/aug...,The truth can be difficult.So difficult that e...
...,...,...,...,...,...,...,...,...,...
21125,false,Rick Santorum,"""President Obama once said he wants everybody ...",2/25/2012,speech,Louis Jacobson,2/27/2012,https://www.politifact.com/factchecks/2012/feb...,Republican presidential candidate Rick Santoru...
21128,true,Barack Obama,During Lyndon B. Johnson’s first 20 years in C...,4/10/2014,speech,W. Gardner Selby,4/14/2014,https://www.politifact.com/factchecks/2014/apr...,President Lyndon B. Johnson of Texas was laude...
21129,true,Leticia Van de Putte,"Dan Patrick was the only state senator ""who vo...",6/27/2014,speech,W. Gardner Selby,7/3/2014,https://www.politifact.com/factchecks/2014/jul...,Democrat Leticia Van de Putte told her party’s...
21136,false,David Dewhurst,"""Phoenix, Arizona, I'm told, is now the No. 2 ...",6/11/2010,speech,Ciara O'Rourke,6/18/2010,https://www.politifact.com/factchecks/2010/jun...,Curbing illegal immigration was a leading war ...


In [None]:
# Assuming 'dat' is your DataFrame and 'triple' is your function.
statement_triple = dat['statement'].progress_apply(triple)

Processing statements:   0%|          | 0/1186 [00:00<?, ?it/s]

In [None]:
statement_triple

0        John McCain | opposed | bankruptcy protections...
31       he | lived in | a colonia\nhe | worked as | a ...
93       easing restrictions | results in | more traffi...
108      I | have taken on | the federal government\nI ...
122      national health care law | puts | bureaucrats\...
                               ...                        
21125    President Obama | once said | he wants everybo...
21128    Lyndon B. Johnson | tenure in Congress | first...
21129    Dan Patrick | voted against | Veteran Entrepre...
21136    Phoenix, Arizona | is | No. 2 kidnapping capit...
21144    health care costs | have been driven down | to...
Name: statement, Length: 1186, dtype: object

In [None]:
print(statement_triple[0])

John McCain | opposed | bankruptcy protections
bankruptcy protections | for | families
families | in bankruptcy | because of medical expenses
medical expenses | could not | pay


In [None]:
# Assuming 'dat' is your DataFrame and 'triple' is your function.
text_triple = dat['text'].progress_apply(triple)

Processing statements:   0%|          | 0/1186 [00:00<?, ?it/s]

In [None]:
text_triple

0        Sen. Barack Obama | portrayed | Sen. John McCa...
31       David Alameel | is | Democratic nominee\nDavid...
93       Georgia senators | opposed | controversial bil...
108      Lt. Gov. Jeff Kottkamp | is | a Republican run...
122      Gov. Chris Christie | said | Mitt Romney would...
                               ...                        
21125    Republican presidential candidate Rick Santoru...
21128    Lyndon B. Johnson | predecessor | President of...
21129    Leticia Van de Putte | affiliation | Democrat\...
21136    Republican Party of Texas | had leading war cr...
21144    Hillary Clinton | argued against | doing away ...
Name: text, Length: 1186, dtype: object

In [None]:
print(text_triple[0])

Sen. Barack Obama | portrayed | Sen. John McCain as insensitive
Sen. John McCain | supported | 2005 law
2005 law | made | it more difficult for personal bankruptcy filers to escape debts
Sen. Barack Obama | used | June 11, 2008 speech
June 11, 2008 speech | to highlight | Sen. John McCain's support
Sen. Barack Obama | noted | Sen. John McCain's opposition
Sen. John McCain's opposition | was to | effort to exempt individuals with medical expenses
Sen. John McCain | sided with | credit card companies
Sen. Barack Obama | fought against | credit card industry's bankruptcy bill
credit card industry's bankruptcy bill | made | it harder for working families to climb out of debt
Sen. John McCain | supported | credit card industry's bankruptcy bill
Sen. John McCain | opposed | exempting families with bankruptcy due to medical expenses
2005 measure | created | means test
means test | designed to force | more consumers to file under Chapter 13
Chapter 7 | allows | individuals to have much debt er

In [None]:
dat['statement_triples'] = statement_triple

In [None]:
dat['text_triples'] = text_triple

In [None]:
dat

Unnamed: 0,verdict,statement_originator,statement,statement_date,statement_source,factchecker,factcheck_date,factcheck_analysis_link,text,statement_triples,text_triples
0,true,Barack Obama,John McCain opposed bankruptcy protections for...,6/11/2008,speech,Adriel Bettelheim,6/16/2008,https://www.politifact.com/factchecks/2008/jun...,Trying to portray his opponent as insensitive ...,John McCain | opposed | bankruptcy protections...,Sen. Barack Obama | portrayed | Sen. John McCa...
31,false,David Alameel,Says he lived in a colonia while working as a ...,6/28/2014,speech,W. Gardner Selby,7/2/2014,https://www.politifact.com/factchecks/2014/jul...,"David Alameel, the Democratic nominee challeng...",he | lived in | a colonia\nhe | worked as | a ...,David Alameel | is | Democratic nominee\nDavid...
93,false,Georgia state senators,Say easing restrictions on selling alcohol on ...,3/16/2011,speech,Eric Stirgus,3/18/2011,https://www.politifact.com/factchecks/2011/mar...,Some Georgia senators opposed to a controversi...,easing restrictions | results in | more traffi...,Georgia senators | opposed | controversial bil...
108,true,Jeff Kottkamp,"""I've taken on the federal government and won.""",7/31/2010,speech,Aaron Sharockman,8/11/2010,https://www.politifact.com/factchecks/2010/aug...,"Lt. Gov. Jeff Kottkamp, a Republican running f...",I | have taken on | the federal government\nI ...,Lt. Gov. Jeff Kottkamp | is | a Republican run...
122,false,Chris Christie,Says the national health care law puts federal...,8/28/2012,speech,Erin O'Neill,8/30/2012,https://www.politifact.com/factchecks/2012/aug...,The truth can be difficult.So difficult that e...,national health care law | puts | bureaucrats\...,Gov. Chris Christie | said | Mitt Romney would...
...,...,...,...,...,...,...,...,...,...,...,...
21125,false,Rick Santorum,"""President Obama once said he wants everybody ...",2/25/2012,speech,Louis Jacobson,2/27/2012,https://www.politifact.com/factchecks/2012/feb...,Republican presidential candidate Rick Santoru...,President Obama | once said | he wants everybo...,Republican presidential candidate Rick Santoru...
21128,true,Barack Obama,During Lyndon B. Johnson’s first 20 years in C...,4/10/2014,speech,W. Gardner Selby,4/14/2014,https://www.politifact.com/factchecks/2014/apr...,President Lyndon B. Johnson of Texas was laude...,Lyndon B. Johnson | tenure in Congress | first...,Lyndon B. Johnson | predecessor | President of...
21129,true,Leticia Van de Putte,"Dan Patrick was the only state senator ""who vo...",6/27/2014,speech,W. Gardner Selby,7/3/2014,https://www.politifact.com/factchecks/2014/jul...,Democrat Leticia Van de Putte told her party’s...,Dan Patrick | voted against | Veteran Entrepre...,Leticia Van de Putte | affiliation | Democrat\...
21136,false,David Dewhurst,"""Phoenix, Arizona, I'm told, is now the No. 2 ...",6/11/2010,speech,Ciara O'Rourke,6/18/2010,https://www.politifact.com/factchecks/2010/jun...,Curbing illegal immigration was a leading war ...,"Phoenix, Arizona | is | No. 2 kidnapping capit...",Republican Party of Texas | had leading war cr...


In [None]:
dat.to_csv("politifact_triples.csv")