# Grammatical correctness of questions  

In [1]:
import sys
sys.path.append("../")

In [2]:
import openai
import configparser

from llm_library.openai import configure_azure_model
from llm_library.openai import ask_openai

from pandas import read_csv
from pandas import DataFrame
from tqdm.auto import tqdm 
from math import isnan

## I. Set up Azure API 

In [3]:
config = configparser.ConfigParser()
status = config.read('../model_configurations/azure.ini') 
assert status == ['../model_configurations/azure.ini']

In [4]:
gpt_conf = configure_azure_model(config)
print(f"Going to use model {gpt_conf.model_name}")
print("Test")
print("Q: What is time?")
print(f"A: {ask_openai(gpt_conf, prompt='What is time?', stop='.')}")

Going to use model gpt-35-turbo
Test
Q: What is time?
A: Time is a concept that helps us measure and order events


## II. Test validity of questions with ChatGPT

In [5]:
tbl = read_csv('input_data/revealing_questions.csv')
tbl.head()

Unnamed: 0,index,label,pronoun,pronoun_lemma,pronoun_sentence,question,question_fix
0,0,1,neile,['see'],Inimeste kaasamine ettevõtte tuleviku planeeri...,kellele tagasiside andmine ja nende arendamine ?,kellele tagasiside andmine ?
1,1,1,nende,['tema'],Inimeste kaasamine ettevõtte tuleviku planeeri...,kelle arendamine ?,
2,2,1,kes,"['kes', 'kes']","1 ) Eesti kodaniku abikaasat , **kes** taotl...",kes taotleb elamisluba käesoleva seaduse § 12 ...,
3,3,1,millega,"['mis', 'mis']",Viimasel etapil hoidis tiirudes kõik märgid ma...,millega Eesti teenis 10 MK punkti ?,
4,4,1,nad,['tema'],"Õnneks mehed siiski päästeti , sest **nad** ...",kes taipasid kiiresti telefonist SIM-kaardi ee...,


### Constructed question

In [6]:
prompt = (
    "Is the following question in Estonian grammatically and syntactically correct:\n'{question}'\n" 
    'Answer in JSON format either “yes” or “no". Ignore the punctuation and capitalization.'
)

result = tbl[['question']].reset_index(names='index').assign(assesment=None)
for i, question in tqdm(enumerate(tbl['question']), total=len(tbl)):
    result.loc[i, 'assessment'] = ask_openai(gpt_conf, prompt.format(question=question))

  0%|          | 0/1071 [00:00<?, ?it/s]

In [7]:
result.to_csv(f"output_data/{gpt_conf.model_name}/validity_of_constructed_questions_.csv")

### Manually corrected question

In [8]:
result = tbl[['question_fix']].reset_index(names='index').assign(assesment=None)

In [9]:
for i, question in tqdm(enumerate(result['question_fix']), total=len(result)):
    if not isinstance(question, str):
        continue
    result.loc[i, 'assessment'] = ask_openai(gpt_conf, prompt.format(question=question))

  0%|          | 0/1071 [00:00<?, ?it/s]

In [10]:
result.to_csv(f"output_data/{gpt_conf.model_name}/validity_of_manually_corrected_questions.csv")

## III. Test if question is valid for the particular sentence with ChatGPT

In [11]:
prompt = (
    "Considering the provided Estonian sentence:\n'{sentence}'\n"
    "is the following question grammatically and syntactically correct:\n'{question}'\n"
    'Answer in JSON format either “yes” or “no". Ignore the punctuation and capitalization.'
)

### Constructed question

In [None]:
result = tbl[['pronoun_sentence', 'question']].reset_index(names='index').assign(assesment=None)
for i, (_, sentence, question, _) in tqdm(result.iterrows(), total=len(tbl)):
    full_prompt = prompt.format(sentence=sentence.replace('**', ''), question=question)
    result.loc[i, 'assessment'] = ask_openai(gpt_conf, full_prompt)

In [None]:
result.to_csv(f"output_data/{gpt_conf.model_name}/validity_of_constructed_questions_with_sentence_context.csv")

### Manually corrected question

In [16]:
result = tbl[['pronoun_sentence', 'question_fix']].reset_index(names='index').assign(assesment=None)
for i, (_, sentence, question, _) in tqdm(result.iterrows(), total=len(tbl)):
    if not isinstance(question, str):
        continue
    full_prompt = prompt.format(sentence=sentence.replace('**', ''), question=question)
    result.loc[i, 'assessment'] = ask_openai(gpt_conf, full_prompt)   

  0%|          | 0/1071 [00:00<?, ?it/s]

In [18]:
result.to_csv(f"output_data/{gpt_conf.model_name}/validity_of_manually_corrected_questions_with_sentence_context.csv")