In [1]:
import re
import requests

import pandas as pd
import json

from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import precision_score, recall_score
from tqdm.auto import tqdm



In [2]:
def analyze_text(text: str):
    # Webservice call
    analyzer_query = "https://api.tartunlp.ai/grammar"
    request_body = {
                      "language": "et",
                      "text": text
                    }
    response = requests.post(analyzer_query, json=request_body)
    assert response.ok, "Webservice failed"
    response = response.json()
    return response

In [3]:
testsamples = pd.read_csv("obl_gpt_input_large1.csv", sep=';', encoding='utf-8')

In [4]:
answers = []
for i in range(len(testsamples)):
    sent = testsamples.iloc[i]["short_sent_untoken"]
    answ = analyze_text(sent)
    answers.append(answ)

In [5]:
testsamples['gpt_answers'] = answers

In [6]:
gpt_cat = []
for answ in answers:
    if len(answ["corrections"])==0:
        gpt_cat.append("yes")
    else:
        gpt_cat.append("no")

In [7]:
testsamples['gpt'] = gpt_cat

In [8]:
predicted = list(testsamples['gpt'])
y_test = list(testsamples['removetype'])

precision, recall, fscore, support = score(y_test, predicted)

print('precision: {}'.format(precision.round(3)))
print('recall: {}'.format(recall.round(3)))
print('fscore: {}'.format(fscore.round(3)))
print('support: {}'.format(support.round(3)))

try:
    precision = precision_score(y_test, predicted, pos_label='yes', average='binary')
    recall = recall_score(y_test, predicted, pos_label='yes', average='binary')
except:
    precision = precision_score(y_test, predicted, labels=['yes'], average='micro')
    recall = recall_score(y_test, predicted, labels=['yes'], average='micro')
print('precision: ',precision.round(3))
print('recall: ',recall.round(3))

precision: [0.535 0.537]
recall: [0.546 0.526]
fscore: [0.541 0.531]
support: [500 500]
precision:  0.537
recall:  0.526


In [9]:
testsamples["match"] = testsamples.removetype.eq(testsamples.gpt) # true if annotation and prediction are the same

In [10]:
testsamples.value_counts("match")

match
True     536
False    464
Name: count, dtype: int64

In [11]:
testsamples.value_counts("gpt")

gpt
no     510
yes    490
Name: count, dtype: int64

In [12]:
testsamples.to_csv("obl_grammar_input_large1_untoken_answers.csv", index= False, sep=';', encoding='utf-8')