This notebook is to analyze the automatic detection tools against human annotation.

In [None]:
import pandas as pd
import ast
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

In [None]:
#Config
modification='prepositions'
model='deepseek'

In [None]:
#Paths
DATA_FOLDER='./data/paraphrases/'
ANNOTATED_FILE=DATA_FOLDER+f"Gender_identity_{modification}_{model}_annotated.xlsx"

In [None]:
annotated_df=pd.read_excel(ANNOTATED_FILE)

In [None]:
#Filtering out sentences with no modification
print("No modification", sum(annotated_df.nb_modif==0))
annotated_df=annotated_df[annotated_df.nb_modif!=0]

In [None]:
print("Avg performance", round(sum(annotated_df.keep)/len(annotated_df), 4))
print("Wrong modif errors", sum(annotated_df.wrong_modif.notna()))
print("Realism errors", sum(annotated_df.realism.notna()))
print("Meaning errors", sum(annotated_df.meaning.notna()))
print("Avg number of modif", round(annotated_df.nb_modif.mean(), 2))

In [None]:
#Similarity metrics analysis
print(annotated_df.rouge_l.describe())
print(annotated_df.bert_score.describe())
print(annotated_df.sbert_score.describe())

In [None]:
#Confusion matrix
if modification=='prepositions':
    annotated_df["wrong_added"]=annotated_df["wrong_added"].apply(ast.literal_eval)
    annotated_df["wrong_removed"]=annotated_df["wrong_removed"].apply(ast.literal_eval)
    annotated_df['automated_keep']=(((annotated_df["wrong_added"].apply(lambda x: x == []))&(annotated_df["wrong_removed"].apply(lambda x: x == [])))|annotated_df.apply(compare_lemmas, axis=1))

elif modification=='AAE':
    annotated_df["proba_par"]=annotated_df["proba_par"].apply(lambda x : round(x,2))
    annotated_df["proba_ori"]=annotated_df["proba_ori"].apply(lambda x : round(x,2))
    annotated_df['automated_keep']=((annotated_df["label_par"]=='LABEL_1')|((annotated_df["proba_par"]<annotated_df["proba_ori"])&(annotated_df["proba_par"]<=0.9)))

conf_matrix = pd.crosstab(annotated_df['keep'], annotated_df['automated_keep'], rownames=['Human'], colnames=['Automated'])
print(conf_matrix)

In [None]:
# Metrics
y_true = annotated_df['keep']
y_pred = annotated_df['automated_keep']

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

print(f"\nPrecision: {precision:.3f}")
print(f"Recall:    {recall:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"Accuracy:  {accuracy:.3f}")

In [None]:
print("False positives examples:")
print(annotated_df.loc[(~annotated_df["keep"])&annotated_df["automated_keep"], ["original", "raw_answer", "nb_modif", "wrong_modif", "realism", "meaning"]])
print("False negatives examples:")
print(annotated_df.loc[(annotated_df["keep"])&(~annotated_df["automated_keep"]), ["original", "raw_answer", "nb_modif", "wrong_modif", "realism", "meaning"]])