# Metrics Documentation

## Import

In [None]:
import pandas as pd

biobert = pd.read_csv('annotations-dpoc-biobert_groups_raw.csv')
llama = pd.read_csv('annotations-dpoc-biobert-llama_groups_raw.csv')

In [None]:
biobert.head()

## Pre processing

In [None]:
import ast
import re

def replace_tokens_with_entities(text, tokens, annotations):
    """
    Replace tokens with their corresponding entity groups based on annotations.
    
    Args:
        text (str): Original text
        tokens (list): List of tokens
        annotations (list): List of annotation dictionaries

    Returns:
        list: List of tokens with entities replaced by their entity groups
    """

    # Create a mapping of character positions to entity groups
    entity_map = {}
    for annotation in annotations:
        start = annotation['start']
        end = annotation['end']
        entity_group = annotation['entity_group']
        
        # Mark all character positions in this range with the entity group
        for pos in range(start, end):
            entity_map[pos] = entity_group
    
    # Track current position in the text
    current_pos = 0
    result_tokens = []
    
    for token in tokens:
        # Skip whitespace and newlines when matching
        while current_pos < len(text) and text[current_pos] in ' \n\t':
            current_pos += 1
        
        # Find the token in the text starting from current position
        token_start = current_pos
        token_end = current_pos + len(token)
        
        # Check if this token position overlaps with any entity
        entity_found = None
        for pos in range(token_start, min(token_end, len(text))):
            if pos in entity_map:
                entity_found = entity_map[pos]
                break
        
        # If we found an entity, replace the token with the entity group
        if entity_found is not None:
            result_tokens.append(entity_found)
        else:
            result_tokens.append(token)

        # Move position forward
        current_pos = token_end
    
    return result_tokens

In [None]:
def decimal_to_bool(number):
    if number != 0:
        return 1
    return 0

def decimal_to_binary_16(number):
    binary = bin(number)[2:].zfill(16)
    binary_list = [int(bit) for bit in binary]
    binary_list.reverse()
    return binary_list

def binary_16_to_8_binary(bits_16):
    lista_aux = []
    for i in range(1, len(bits_16), 2):
        if bits_16[i] == 1 or bits_16[i - 1] == 1:
            lista_aux.append(1)
        else:
            lista_aux.append(0)  
    return lista_aux

def decimal_to_binary(n):

    return binary_16_to_8_binary(decimal_to_binary_16(n))

In [None]:
# Convert labels of a annotation to binary vector
cats = {'Pathophysiology':0,
        'Etiology':1,
        'Epidemiology':2,
        'History':3,
        'Physical_examination':4,
        'Complementary_exams':5,
        'Differential_diagnosis':6,
        'Therapeutic_plan':7}

def label2binary(labels):
    parts = labels.split('-')
    vet = [0] * 8
    for label in parts:
        if label in cats:
            vet[cats[label]] = 1
    return vet

In [None]:
# Convert all the text - divided in tokens - labels to binary vectors
def preprocessClassification(tokens):
    aux = []
    for index in range(len(tokens)):
        aux.append(label2binary(tokens[index]))
    return aux

In [None]:
# Convert all the text - divided in tokens - labels to boolean
def preprocessAnotation(tokens):
    aux = []
    for index in range(len(tokens)):
        aux.append(1 if sum(label2binary(tokens[index])) >= 1 else 0)
    return aux

## Input predictions

In [None]:
from tqdm import tqdm

def add_predictions(df):

    df_output = df.copy()[["annotation id","text"]]
    annotations = []
    annotations_by_label = []
    annotations_prediction = []
    annotations_prediction_by_label = []

    for index in tqdm(range(len(df))):

        ner_ids = ast.literal_eval(df['ner_ids'][index])
        tokens = ast.literal_eval(df['tokens'][index])
        annotations_str = ast.literal_eval(df['annotations'][index])

        ## Converter ner_ids to binary
        annotations.append([decimal_to_bool(x) for x in ner_ids])
        annotations_by_label.append([decimal_to_binary(x) for x in ner_ids])

        ## Create column for prediction
        prediction = replace_tokens_with_entities(df['text'][index], tokens, annotations_str)
        annotations_prediction.append(preprocessAnotation(prediction))
        annotations_prediction_by_label.append(preprocessClassification(prediction))

        pass

    df_output['real annotation'] = annotations
    df_output['real annotation by label'] = annotations_by_label
    df_output['prediction annotation'] = annotations_prediction
    df_output['prediction annotation by label'] = annotations_prediction_by_label

    return df_output

In [None]:
biobert_output = add_predictions(biobert)
llama_output = add_predictions(llama)

In [None]:
biobert_output.head()

## Metrics

In [None]:
from sklearn.metrics import jaccard_score, precision_score, recall_score, f1_score
import numpy as np

def metrics(df):

    df_test = df.copy()
    precision = []
    recall = []
    f1 = []
    bad_data = []

    df_test = df.copy()

    for index in tqdm(range(len(df_test))):
        if index not in bad_data:
            
            precision_now = precision_score(df_test["real annotation"][index], df_test["prediction annotation"][index],average='binary')
            precision.append([precision_now,len(df_test["real annotation"][index])])

            recall_now = recall_score(df_test["real annotation"][index], df_test["prediction annotation"][index],average='binary')
            recall.append([recall_now,len(df_test["real annotation"][index])])

            f1_now = f1_score(df_test["real annotation"][index], df_test["prediction annotation"][index],average='binary')
            f1.append([f1_now,len(df_test["real annotation"][index])])

        else:
            precision.append(np.nan)
            recall.append(np.nan)
            f1.append(np.nan)
            
    df_test["Precision"] = precision
    df_test["Recall"] = recall
    df_test["F1"] = f1

    return df_test

In [None]:
biobert_output = metrics(biobert_output)
llama_output = metrics(llama_output)

## Evaluation

### Precision

In [None]:
score = 0
count = 0
for value in biobert_output["Precision"]:
    score += value[0]
    count += 1

Precision_Mean = score/count
    
print(f"Biobert - Precision_Mean: {Precision_Mean}")

score = 0
count = 0
for value in llama_output["Precision"]:
    score += value[0]
    count += 1

Precision_Mean = score/count

print(f"Biobert-LLama - Precision_Mean: {Precision_Mean}")

### Recall

In [None]:
score = 0
count = 0
for value in biobert_output["Recall"]:
    score += value[0]
    count += 1

Recall_Mean = score/count

print(f"Biobert - Recall_Mean: {Recall_Mean}")

score = 0
count = 0
for value in llama_output["Recall"]:
    score += value[0]
    count += 1

Recall_Mean = score/count

print(f"Biobert-LLama - Recall_Mean: {Recall_Mean}")

### F1

In [None]:
score = 0
count = 0
for value in biobert_output["F1"]:
    score += value[0]
    count += 1

F1_Mean = score/count

print(f"Biobert - F1_Mean: {F1_Mean}")

score = 0
count = 0
for value in llama_output["F1"]:
    score += value[0]
    count += 1

F1_Mean = score/count

print(f"Biobert-LLama - F1_Mean: {F1_Mean}")

## Classification avaliation

### Jaccard Index

In [None]:
from sklearn.metrics import jaccard_score

def jaccard(y_true, y_pred):
    return jaccard_score(y_true, y_pred, average='micro')

# Annotation avaliation

### Jaccard Index

In [None]:
from sklearn.metrics import jaccard_score

def jaccard(y_true, y_pred):
    return jaccard_score(y_true, y_pred, average='micro')

### Precision

In [None]:
from sklearn.metrics import precision_score

def precision(y_true, y_pred):
    return precision_score(y_true, y_pred, average='binary')

### Recall

In [None]:
from sklearn.metrics import recall_score

def recall(y_true, y_pred):
    return recall_score(y_true, y_pred, average='binary')

### F1 Score

In [None]:
from sklearn.metrics import f1_score

def f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average='binary')