In [None]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
from pathlib import Path
import tomli

CSS = """
.output {
    flex-direction: row;
}
"""

HTML("<style>{}</style>".format(CSS))

In [None]:
# get current path location with pathlib
markings_location = (
    Path().resolve().parent / "data" / "processed" / "Evaluatie markings week 1.csv"
)

# Read dataframe. Remove annotations that were marked as 'test' or not done by annotators.
annotatie_data = pd.read_csv(markings_location)  # Load data
annotatie_data = annotatie_data.loc[
    ~(annotatie_data.comments == "test") & ~(annotatie_data.comments == "final test")
]

In [None]:
# load used enc_ids
with open(Path().resolve() / "analysis_config.toml", "rb") as f:
    conf_dict = tomli.load(f)

In [None]:
test_users = []

for key, value in conf_dict["test_users"].items():
    test_users.append(value["email"])

test_users.append("No user")

In [None]:
annotatie_data = annotatie_data.loc[~annotatie_data.user.isin(test_users)]

In [None]:
student_1 = list(conf_dict["students"].keys())[0]
student_1_mail = conf_dict["students"][student_1]["email"]
student_2 = list(conf_dict["students"].keys())[1]
student_2_mail = conf_dict["students"][student_2]["email"]

### Wat eerste observaties:
- Soms toch overlappende observaties: GPT patient 1 nicu voor Anna en ORG patient 1 IC voor Zoe.

In [None]:
pd.options.display.max_colwidth = 50
annotatie_data.sort_values(["patientid", "user"])

## some statistics for our fantastic annotators

In [None]:
# Function for getting overview of omissions/hallucinations.
def get_annotation_table(
    data,
    patient_id,
    annotator,
    letter_type=["ORG letter", "GPT letter"],
    evaluation_type=[
        "highlighted_missings",
        "highlighted_halucinations",
        "highlighted_trivial_information",
    ],
):
    annotator_name = list(annotator.keys())[0]
    annotator_email = annotator[annotator_name]
    patient_data = data.loc[
        (data.patientid == patient_id) & (data.letter_evaluated == letter_type)
    ]
    highlights_str = patient_data.loc[
        patient_data.user == annotator_email, evaluation_type
    ].iloc[0]
    # for ch in ["'",'[',']']: highlights_str = highlights_str.replace(ch,'')
    # highlights_list = highlights_str.split(',')
    # for i,str in enumerate(highlights_list): highlights_list[i] = str.strip()
    highlights_list = get_annotation_list(highlights_str)
    highlights_df = pd.DataFrame(
        {annotator_name: highlights_list}
    ).style.set_properties(**{"text-align": "left"})
    return highlights_df.set_table_styles(
        [dict(selector="th", props=[("text-align", "left")])]
    )


def get_annotation_list(string):
    for ch in ["'", "[", "]"]:
        string = string.replace(ch, "")
    highlights_list = string.split(",")
    for i, str in enumerate(highlights_list):
        highlights_list[i] = str.strip()
    return highlights_list


pd.options.display.max_colwidth = 10000

In [None]:
data_om_A = annotatie_data.loc[
    annotatie_data.user == student_1_mail, "highlighted_missings"
].map(lambda string: len(get_annotation_list(string)))
data_om_Z = annotatie_data.loc[
    annotatie_data.user == student_2_mail, "highlighted_missings"
].map(lambda string: len(get_annotation_list(string)))
data_hal_A = annotatie_data.loc[
    annotatie_data.user == student_1_mail, "highlighted_halucinations"
].map(lambda string: len(get_annotation_list(string)))
data_hal_Z = annotatie_data.loc[
    annotatie_data.user == student_2_mail, "highlighted_halucinations"
].map(lambda string: len(get_annotation_list(string)))
data_triv_A = annotatie_data.loc[
    annotatie_data.user == student_1_mail,
    "highlighted_trivial_information",
].map(lambda string: len(get_annotation_list(string)))
data_triv_Z = annotatie_data.loc[
    annotatie_data.user == student_2_mail,
    "highlighted_trivial_information",
].map(lambda string: len(get_annotation_list(string)))
stats = pd.DataFrame(
    {
        "mean omission": [data_om_A.mean(), data_om_Z.mean()],
        "median omission": [data_hal_A.median(), data_hal_Z.median()],
        "mean hallucination": [data_hal_A.mean(), data_hal_Z.mean()],
        "median hallucination": [data_hal_A.median(), data_hal_Z.median()],
        "mean trivial facts": [data_triv_A.mean(), data_triv_Z.mean()],
        "median trivial facts": [data_triv_A.median(), data_triv_Z.median()],
    }
)
stats = stats.transpose()
stats.columns = ["Anna", "Zoë"]
display(stats)

In [None]:
display(
    annotatie_data.loc[
        annotatie_data.user == student_1_mail, "highlighted_missings"
    ].map(lambda string: len(get_annotation_list(string)))
)
display(
    annotatie_data.loc[
        annotatie_data.user == student_2_mail, "highlighted_missings"
    ].map(lambda string: len(get_annotation_list(string)))
)

## ORIGINAL PATIENT 2 NICU LETTER 

In [None]:
# Get omissions NICU patient 2 original patient letter
A_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_missings",
)
Z_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_missings",
)

display(A_omission)
display(Z_omission)

In [None]:
# Get hallucinations NICU patient 2 original patient letter
A_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_halucinations",
)
Z_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_halucinations",
)

display(A_hallucination)
display(Z_hallucination)

In [None]:
# Get trivial facts NICU patient 2
A_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_trivial_information",
)
Z_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_trivial_information",
)

display(A_trivial)
display(Z_trivial)

## GPT PATIENT 2 NICU LETTER

In [None]:
# Get omissions NICU patient 2
A_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_missings",
)
Z_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_missings",
)

display(A_omission)
display(Z_omission)

In [None]:
# Get hallucinations NICU patient 2
A_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_halucinations",
)
Z_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_halucinations",
)

display(A_hallucination)
display(Z_hallucination)

In [None]:
# Get trivial facts NICU patient 2
A_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_trivial_information",
)
Z_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_2_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="GPT letter",
    evaluation_type="highlighted_trivial_information",
)

display(A_trivial)
display(Z_trivial)

## ORIGINAL PATIENT 3 NICU LETTER

In [None]:
# Get omissions NICU patient 3
A_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_missings",
)
Z_omission = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_missings",
)

display(A_omission)
display(Z_omission)

In [None]:
# Get hallucinations NICU patient 3
A_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_halucinations",
)
Z_hallucination = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_halucinations",
)

display(A_hallucination)
display(Z_hallucination)

In [None]:
# Get trivial facts NICU patient 3
A_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Anna": student_1_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_trivial_information",
)
Z_trivial = get_annotation_table(
    data=annotatie_data,
    patient_id="patient_3_nicu",
    annotator={"Zoë": student_2_mail},
    letter_type="ORG letter",
    evaluation_type="highlighted_trivial_information",
)

display(A_trivial)
display(Z_trivial)