# Antecendent presence in the sentence 

In [1]:
import sys
sys.path.append("../")

In [2]:
import openai
import configparser

from llm_library.openai import configure_azure_model
from llm_library.openai import ask_openai

from pandas import read_csv
from pandas import DataFrame
from tqdm.auto import tqdm 
from math import isnan

## I. Set up Azure API 

In [3]:
config = configparser.ConfigParser()
status = config.read('../model_configurations/azure.ini') 
assert status == ['../model_configurations/azure.ini']

In [4]:
gpt_conf = configure_azure_model(config)
print(f"Going to use model {gpt_conf.model_name}")
print("Test")
print("Q: What is time?")
print(f"A: {ask_openai(gpt_conf, prompt='What is time?', stop='.')}")

Going to use model gpt-35-turbo
Test
Q: What is time?
A: Time is a fundamental concept in physics and philosophy that refers to the sequence of events and the ongoing, irreversible flow from past to present to future


## II. Test whether anaphora antecedent is in the sentence with ChatGPT  

In [5]:
tbl = read_csv('input_data/anaphora_presence.csv')
tbl.head()

Unnamed: 0,index,label,pronoun,pronoun_lemma,pronoun_sentence
0,1,1,neile,['see'],Inimeste kaasamine ettevõtte tuleviku planeeri...
1,2,1,nende,['tema'],Inimeste kaasamine ettevõtte tuleviku planeeri...
2,3,2,ma,['mina'],"Ei tea , kas **ma** lugesin seda Vindi raama..."
3,4,2,seda,['see'],"Ei tea , kas ma lugesin **seda** Vindi raama..."
4,5,2,see,['see'],"Ei tea , kas ma lugesin seda Vindi raamatust ,..."


### Presence in the sentence

In [6]:
prompt = (
    "Is the explicit antecedent for the anaphora '{pronoun}' in this Estonian sentence:\n'{sentence}'\n"
    'Answer in JSON format with either "yes" ,"no" or "other" when the provided word is not an anaphora.'
)

In [7]:
result = tbl[['pronoun', 'pronoun_sentence']].reset_index(names='index').assign(assesment=None)
for i, (_, pronoun, sentence, _) in tqdm(result.iterrows(), total=len(tbl)):
    full_prompt = prompt.format(sentence=sentence.replace('**', ''), pronoun=pronoun)
    result.loc[i, 'assessment'] = ask_openai(gpt_conf, full_prompt) 

  0%|          | 0/4000 [00:00<?, ?it/s]

In [8]:
result.to_csv(f"output_data/{gpt_conf.model_name}/anaphora_antecedent_is_in_sentence.csv")