In [27]:
from openai import OpenAI
import xml.etree.ElementTree as ET
from tqdm import tqdm

xml_file_path = '../data/source-papers.xml'
tree = ET.parse(xml_file_path)
root = tree.getroot()

title = root.find('.//atl').text #find returns first instance of title, abstract
abstract = root.find('.//ab').text

In [12]:
import pandas as pd
import xml.etree.ElementTree as ET

# Extract relevant data
data = []
for rec in root.findall('rec'):
    header = rec.find('header')
    result_id = rec.get('resultID')
    
    # Extract header attributes
    short_db_name = header.get('shortDbName')
    long_db_name = header.get('longDbName')
    ui_term = header.get('uiTerm')

    # Extract control info
    control_info = header.find('controlInfo')
    pubinfo = control_info.find('pubinfo')
    artinfo = control_info.find('artinfo')
    display_info = header.find('displayInfo')
    
    # Extracting various fields with error handling
    article_data = {
        'resultID': result_id,
        'shortDbName': short_db_name,
        'longDbName': long_db_name,
        'uiTerm': ui_term,
        'jtl': control_info.find('./jinfo/jtl').text if control_info.find('./jinfo/jtl') is not None else None,
        'issn': control_info.find('./jinfo/issn').text if control_info.find('./jinfo/issn') is not None else None,
        'publication_date': pubinfo.find('dt').text if pubinfo.find('dt') is not None else None,
        'volume': pubinfo.find('vid').text if pubinfo.find('vid') is not None else None,
        'issue': pubinfo.find('iid').text if pubinfo.find('iid') is not None else None,
        'doi': artinfo.find('ui').text if artinfo.find('ui') is not None else None,
        'ppct': artinfo.find('ppct').text if artinfo.find('ppct') is not None else None,
        'pages': artinfo.find('pages').text if artinfo.find('pages') is not None else None,
        'title': artinfo.find('./tig/atl').text if artinfo.find('./tig/atl') is not None else None,
        'authors': ', '.join([au.text for au in artinfo.findall('./aug/au')]) if artinfo.findall('./aug/au') else None,
        'affiliations': ', '.join([affil.text for affil in artinfo.findall('./aug/affil')]) if artinfo.findall('./aug/affil') else None,
        'subjects': ', '.join([su.text for su in artinfo.findall('./su')]) if artinfo.findall('./su') else None,
        'abstract': artinfo.find('ab').text if artinfo.find('ab') is not None else None,
        'publication_type': artinfo.find('pubtype').text if artinfo.find('pubtype') is not None else None,
        'language': control_info.find('./language').text if control_info.find('./language') is not None else None,
        'url': display_info.find('./pLink/url').text if display_info.find('./pLink/url') is not None else None
    }
    
    data.append(article_data)

# Create DataFrame
df = pd.DataFrame(data)

In [17]:
def generate_response(title, abstract):
    prompt = f"""
    Based on the title and abstract of the following research paper, evaluate its relevance to the topic 'linking environmental changes to health' on a scale from 0 (no relevance) to 10 (highly relevant). Please provide only a single number (from 0 to 10) as your response without any additional explanation.
    Title: {title}
    Abstract: {abstract}
    """

    completion = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "You are a research assistant."},
        {"role": "user", "content": prompt}
    ],
        max_tokens=10,  # output tokens cap
        temperature=0.2,  # low temp values means more deterministic
        n=2, # generates two responses to the prompt
        stop=["\n"] # stop generating at new line
    )
    
    return completion

In [25]:
df['response_0'] = None
df['response_1'] = None

In [29]:
completion

ChatCompletion(id='chatcmpl-AD9prVt7fm6Zxmheedq7gVGJW7q72', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='9', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=1, logprobs=None, message=ChatCompletionMessage(content='9', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1727699435, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=2, prompt_tokens=343, total_tokens=345, completion_tokens_details={'reasoning_tokens': 0}))

In [31]:
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    title = row['title']
    abstract = row['abstract']
    response = generate_response(title, abstract)
    df.at[index, 'response_0'] = response.choices[0].message.content
    df.at[index, 'response_1'] = response.choices[1].message.content

  2%|▏         | 44/2933 [00:25<27:57,  1.72it/s] 


KeyboardInterrupt: 

In [34]:
df

Unnamed: 0,resultID,shortDbName,longDbName,uiTerm,jtl,issn,publication_date,volume,issue,doi,...,title,authors,affiliations,subjects,abstract,publication_type,language,url,response_0,response_1
0,1,ecn,EconLit,2113840,American Economic Review: Insights,2640205X,20240901,6,3,10.1257/aeri.20230382,...,Dynamic Spending Responses to Wealth Shocks: E...,"Andersen, Asger Lau, Johannesen, Niels, Sherid...","Center for Economic Behavior and Inequality, U...","Consumer Economics: Empirical Analysis, Macroe...",How much and over what horizon do households a...,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,2,2
1,2,ecn,EconLit,2113839,American Economic Review: Insights,2640205X,20240901,6,3,10.1257/aeri.20230402,...,Repression and Repertoires,"Morris, Stephen, Shadmehr, Mehdi","MIT, U of North Carolina at Chapel Hill",Social Choice; Clubs; Committees; Associations...,We formalize Tilly's concept of repertoires of...,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,1,2
2,3,ecn,EconLit,2113838,American Economic Review: Insights,2640205X,20240901,6,3,10.1257/aeri.20230314,...,The Socioeconomic Distribution of Choice Quali...,"Handel, Benjamin, Kolstad, Jonathan, Minten, T...","U of California, Berkeley, U of California, Be...","Personal Income, Wealth, and Their Distributio...",We study how choice quality relates to socioec...,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,3,2
3,4,ecn,EconLit,2113837,American Economic Review: Insights,2640205X,20240901,6,3,10.1257/aeri.20230263,...,Central Bank Credibility and Fiscal Responsibi...,"Schreger, Jesse, Yared, Pierre, Zaratiegui, Em...","Columbia U, Columbia U, Columbia U",General Aggregative Models: Keynes; Keynesian;...,We consider a New Keynesian model with strateg...,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,0,0
4,5,ecn,EconLit,2113836,American Economic Review: Insights,2640205X,20240901,6,3,10.1257/aeri.20230353,...,Regulating Transformative Technologies,"Acemoglu, Daron, Lensman, Todd","MIT, MIT","Firm Behavior: Theory, Taxation and Subsidies:...",Transformative technologies like generative AI...,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2928,2929,ecn,EconLit,1873960,Journal of Political Economy,00223808,20201101,128,11,,...,Erratum: Chinese College Admissions and School...,"Chen, Yan, Kesten, Onur","U of Michigan and Tsinghua U, U of Sydney","Higher Education; Research Institutions, Educa...",,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,,
2929,2930,ecn,EconLit,1852541,Journal of Political Economy,00223808,20200501,128,5,,...,CEO Behavior and Firm Performance: Erratum,"Bandiera, Oriana, Hansen, Stephen, Prat, Andre...",London School of Economics and Political Scien...,"Firm Behavior: Empirical Analysis, Mergers; Ac...",,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,,
2930,2931,ecn,EconLit,1847979,International Journal of Political Economy,08911916,20200401,49,2,10.1080/08911916.2020.1778865,...,Julio Lopez Gallardo: Some Personal Recollections,"Sawyer, Malcolm",U of Leeds,History of Economic Thought: Individuals,,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,,
2931,2932,ecn,EconLit,1847976,International Journal of Political Economy,08911916,20200401,49,2,10.1080/08911916.2020.1778867,...,In Memoriam: Julio Lopez Gallardo 1941-2020,"Seccareccia, Mario",U of Ottawa,Obituaries,,Journal Article,English,https://search.ebscohost.com/login.aspx?direct...,,


In [35]:
(df.response_0 - df.response_1).mean()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [21]:
completion

ChatCompletion(id='chatcmpl-AD9prVt7fm6Zxmheedq7gVGJW7q72', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='9', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=1, logprobs=None, message=ChatCompletionMessage(content='9', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1727699435, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=2, prompt_tokens=343, total_tokens=345, completion_tokens_details={'reasoning_tokens': 0}))

In [24]:
345 * 2800

966000

In [23]:
345 * (0.0015/1000) * 2800

1.449

In [18]:
# environmental effects on health

completion = generate_response(df.iloc[830].title, df.iloc[830].abstract)

print(completion.choices[0].message)


ChatCompletionMessage(content='9', role='assistant', function_call=None, tool_calls=None, refusal=None)


In [20]:
[completion.choices[0].message.content, completion.choices[1].message.content]

['9', '9']

In [3]:




response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": ""},
        {"role": "user", "content": prompt}
    ],
    max_tokens=10,  # output tokens cap
    temperature=0.2,  # low temp values means more deterministic
    n=2, # generates two responses to the prompt
    stop=["\n"] # stop generating at new line
)

print("Response from OpenAI API:")
print(response['choices'][0]['message']['content'])


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
