In [26]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd

In [20]:
df_model = pd.read_csv('../data/result_model.csv')

df_model = df_model.rename(columns={"GENE_OR_GENE_PRODUCT": "Gene", "MEASURE": "Measure",
                                   "METHOD": "Method", "MUTATION": "Mutation"})
df_model['annotator'] = 'model'

df_model=df_model[['annotator', 'pmid', 'Gene', 'Measure', 'Method', 'Mutation']]

print(df_model.columns)

df_double_checkers = pd.read_csv('../data/double_checkers.csv')

df_double_checker = df_double_checkers[['annotator', 'text', 'Gene', 'Measure', 'Method', 'Mutation',]]

print(df_double_checker.columns)

Index(['annotator', 'pmid', 'Gene', 'Measure', 'Method', 'Mutation'], dtype='object')
Index(['annotator', 'text', 'Gene', 'Measure', 'Method', 'Mutation'], dtype='object')


In [21]:
def normalize_entities(entity_str):
    if pd.isna(entity_str):
        return set()
    return set(e.strip().lower() for e in entity_str.split(",") if e.strip())

def flatten_annotations(df, cols):
    all_entities = {col: [] for col in cols}
    for col in cols:
        for ents in df[col].fillna(""):
            all_entities[col].extend([e.strip().lower() for e in ents.split(",") if e.strip()])
        all_entities[col] = set(all_entities[col])
    return all_entities

In [22]:
# Colunas a comparar
entity_cols = ["Mutation", "Gene", "Measure", "Method"]

# Agrupa todos os checadores (exceto 'model')
#df_checkers = df[df["annotator"] != "model"]
gold_entities = flatten_annotations(df_double_checkers, entity_cols)

# Agrupa o modelo
#df_model = df[df["annotator"] == "model"]
model_entities = flatten_annotations(df_model, entity_cols)

# Calcular métricas por categoria e geral
results = []
for col in entity_cols:
    y_true = [1 if e in gold_entities[col] else 0 for e in model_entities[col]]
    y_pred = [1]*len(model_entities[col])  # tudo que o modelo previu
    # Para recall, adicionamos os FNs (que estão no gold mas não no modelo)
    missing = [e for e in gold_entities[col] if e not in model_entities[col]]
    y_true.extend([1]*len(missing))
    y_pred.extend([0]*len(missing))

    p = precision_score(y_true, y_pred)
    r = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    results.append((col, p, r, f1))

In [23]:
df_results = pd.DataFrame(results, columns=["Entity", "Precision", "Recall", "F1"])
df_results.loc["Overall"] = df_results[["Precision", "Recall", "F1"]].mean()
print(df_results)

           Entity  Precision    Recall        F1
0        Mutation   0.144578  0.240000  0.180451
1            Gene   0.235294  0.300000  0.263736
2         Measure   1.000000  0.066667  0.125000
3          Method   1.000000  0.044444  0.085106
Overall       NaN   0.594968  0.162778  0.163573
