# Anaphor resolution with ChatGPT 

In [1]:
import openai
from openai import AzureOpenAI
import configparser

from pandas import read_excel
from pandas import DataFrame
from tqdm.auto import tqdm 
from math import isnan
import pandas as pd
import json

from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import precision_score, recall_score



## I. Set up Azure API : openai ver 1.35.10

In [2]:
config = configparser.ConfigParser()
status = config.read('azure.ini') 
assert status == ['azure.ini']

In [3]:
DEPLOIMENT_ID = config['azure-configuration']['deployment_id']
GPT_MODEL = config['azure-configuration']['model']

client = AzureOpenAI(
  api_key = config['azure-configuration']['api_key'],
  api_version = config['azure-configuration']['api_version'],
  azure_endpoint = config['azure-configuration']['api_base'],
)

In [5]:
def ask_openai(prompt: str) -> str:
    try:
        response = client.chat.completions.create(
            model = DEPLOIMENT_ID,
            messages=[{"role": "user", "content": prompt}]
        )
    except Exception as e:
        print(e)
        return 'FAIL'
    return response.choices[0].message.content   

In [32]:
testsamples = pd.read_csv("obl_gpt_input_large1.csv", sep=';', encoding='utf-8')
#testsamples.head()

In [33]:
prompt = (
    "Is the following Estonian sentence delimited by triple backticks grammatically correct: ```{sentence}```\n" 
    'Ignore punctuation and capitalization. Answer “yes” or “no".'
)

answers = []
for i in range(len(testsamples)):
    sent = testsamples.iloc[i]["short_sent"]
    answ = ask_openai(prompt.format(sentence=sent))
    if len(answ)>3:
        answ = answ[:3]
    answ = answ.replace(",", "").strip().lower()  
    answ = answ.replace(".", "").strip().lower()  
    answers.append(answ)

Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}
Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param

In [34]:
testsamples['gpt'] = answers

In [35]:
predicted = list(testsamples['gpt'])
y_test = list(testsamples['removetype'])

precision, recall, fscore, support = score(y_test, predicted)

print('precision: {}'.format(precision.round(3)))
print('recall: {}'.format(recall.round(3)))
print('fscore: {}'.format(fscore.round(3)))
print('support: {}'.format(support.round(3)))

try:
    precision = precision_score(y_test, predicted, pos_label='yes', average='binary')
    recall = recall_score(y_test, predicted, pos_label='yes', average='binary')
except:
    precision = precision_score(y_test, predicted, labels=['yes'], average='micro')
    recall = recall_score(y_test, predicted, labels=['yes'], average='micro')
print('precision: ',precision.round(3))
print('recall: ',recall.round(3))

precision: [0.    0.532 0.541]
recall: [0.    0.578 0.492]
fscore: [0.    0.554 0.515]
support: [  0 500 500]
precision:  0.541
recall:  0.492


  _warn_prf(average, modifier, msg_start, len(result))


In [36]:
testsamples["match"] = testsamples.removetype.eq(testsamples.gpt) # true if annotation and prediction are the same

In [37]:
testsamples.value_counts("match")

match
True     535
False    465
Name: count, dtype: int64

In [38]:
testsamples.value_counts("gpt")

gpt
no     543
yes    455
fai      2
Name: count, dtype: int64

In [39]:
testsamples.to_csv("obl_gpt_input_large1_answers.csv", index= False, sep=';', encoding='utf-8')

### Also test on untokenized sentences

In [41]:
testsamples = pd.read_csv("obl_gpt_input_large1.csv", sep=';', encoding='utf-8')
testsamples.head()

Unnamed: 0,id,fpath,sentence,remove_start,remove_end,removed,type,short_sent,cons_score,ual,la,removetype,short_sent_untoken
0,1051,Train/tea_eesti_arst_2004_osa_3_ud211.conllu,Seda ka kohe-kohe algavatel järjekordsetel pal...,107,127,peale haiglate liidu,free,Seda ka kohe-kohe algavatel järjekordsetel pal...,58.8,64.7,82.4,yes,Seda ka kohe-kohe algavatel järjekordsetel pal...
1,1389,Train/aja_luup200106_osa_2_ud211.conllu,"Kuna eestlased hindavad oma kodu kõrgelt , sii...",204,215,elamispinda,free,"Kuna eestlased hindavad oma kodu kõrgelt , sii...",20.0,23.3,33.3,yes,"Kuna eestlased hindavad oma kodu kõrgelt, siis..."
2,1109,Train/aja_pm20000218_osa_5_ud211.conllu,"Järgnevalt anti Marsile kiiresti mitu käsku , ...",16,23,Marsile,free,"Järgnevalt anti kiiresti mitu käsku , lootuses...",86.7,86.7,96.7,yes,"Järgnevalt anti kiiresti mitu käsku, lootuses,..."
3,398,Train/aja_luup200202_osa_1_ud211.conllu,"Enne minekut hoiatab ta oma leebel moel , et ä...",59,66,haiglas,free,"Enne minekut hoiatab ta oma leebel moel , et ä...",100.0,100.0,100.0,yes,"Enne minekut hoiatab ta oma leebel moel, et är..."
4,2346,Train/arborest_ud211.conllu,Peeter kargas läbi akna aeda kuue järele .,24,28,aeda,free,Peeter kargas läbi akna kuue järele .,100.0,100.0,100.0,yes,Peeter kargas läbi akna kuue järele.


In [42]:
prompt = (
    "Is the following Estonian sentence delimited by triple backticks grammatically correct: ```{sentence}```\n" 
    'Ignore punctuation and capitalization. Answer “yes” or “no".'
)

answers = []
for i in range(len(testsamples)):
    sent = testsamples.iloc[i]["short_sent_untoken"]
    answ = ask_openai(prompt.format(sentence=sent))
    if len(answ)>3:
        answ = answ[:3]
    answ = answ.replace(",", "").strip().lower()  
    answ = answ.replace(".", "").strip().lower()  
    answers.append(answ)

Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}
Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param

In [43]:
testsamples['gpt'] = answers

In [44]:
predicted = list(testsamples['gpt'])
y_test = list(testsamples['removetype'])

precision, recall, fscore, support = score(y_test, predicted)

print('precision: {}'.format(precision.round(3)))
print('recall: {}'.format(recall.round(3)))
print('fscore: {}'.format(fscore.round(3)))
print('support: {}'.format(support.round(3)))

try:
    precision = precision_score(y_test, predicted, pos_label='yes', average='binary')
    recall = recall_score(y_test, predicted, pos_label='yes', average='binary')
except:
    precision = precision_score(y_test, predicted, labels=['yes'], average='micro')
    recall = recall_score(y_test, predicted, labels=['yes'], average='micro')
print('precision: ',precision.round(3))
print('recall: ',recall.round(3))

precision: [0.    0.581 0.546]
recall: [0.    0.408 0.706]
fscore: [0.    0.479 0.616]
support: [  0 500 500]
precision:  0.546
recall:  0.706


  _warn_prf(average, modifier, msg_start, len(result))


In [45]:
testsamples["match"] = testsamples.removetype.eq(testsamples.gpt) # true if annotation and prediction are the same

In [46]:
testsamples.value_counts("match")

match
True     557
False    443
Name: count, dtype: int64

In [47]:
testsamples.value_counts("gpt")

gpt
yes    647
no     351
fai      2
Name: count, dtype: int64

In [48]:
testsamples.to_csv("obl_gpt_input_large1_untoken_answers.csv", index= False, sep=';', encoding='utf-8')