# Load Data, Sanity Checks

In [1]:
import pandas as pd
import numpy as np

In [2]:
!ls

Patient 2 [label YES, NO, MISSING] - GPT-Labeled-LLM_Note (Bolanle).csv
Patient 2 [label YES, NO, MISSING] - GPT-Labled-Transcript (Waris).csv
Patient2_Scoring.ipynb


In [3]:
labelled_transcript = pd.read_csv("Patient 2 [label YES, NO, MISSING] - GPT-Labled-Transcript (Waris).csv")
labelled_note = pd.read_csv("Patient 2 [label YES, NO, MISSING] - GPT-Labeled-LLM_Note (Bolanle).csv")

In [4]:
labelled_transcript.head()

Unnamed: 0,Timestamp,Script,Placenta Previa,Placenta Accreta Spectrum or concern,Multiple Gestation,Polyhydramnios,Large for Gestational Age,Fibroids,Preeclampsia,HELLP,Anticoagulation use,Baseline Laboratory Values - Hemoglobin/Hematocrit,Baseline Laboratory Values - Platelet Count,"Baseline Laboratory Values - Coagulopathy (PTT, PT/INR)"
0,[00:00.000 --> 00:02.000,Hello.,,,,,,,,,,,,
1,[00:02.000 --> 00:04.000,"Hi, good morning.",,,,,,,,,,,,
2,[00:04.000 --> 00:07.000,This is Dr. Smith from the Bergenman Women's ...,,,,,,,,,,,,
3,[00:07.000 --> 00:08.000,Dr. Canis-Physion team.,,,,,,,,,,,,
4,[00:08.000 --> 00:11.000,I'm looking for Ms. Mary Blank.,,,,,,,,,,,,


In [5]:
labelled_note.head()

Unnamed: 0,Sentences,Placenta Previa,Placenta Accreta Spectrum or concern,Multiple Gestation,Polyhydramnios,Large for Gestational Age,Fibroids,Preeclampsia,HELLP,Anticoagulation use,Baseline Laboratory Values - Hemoglobin/Hematocrit,Baseline Laboratory Values - Platelet Count,"Baseline Laboratory Values - Coagulopathy (PTT, PT/INR)"
0,High-Risk Anesthesia Consult Note,,,,,,,,,,,,
1,Patient Name: Jane Blank,,,,,,,,,,,,
2,MRN: 000000001,,,,,,,,,,,,
3,Age: 35,,,,,,,,,,,,
4,Gravida/Para: G4P3003,,,,,,,,,,,,


In [6]:
# get rid of whitespace around text

labelled_transcript.columns = labelled_transcript.columns.str.strip()
labelled_transcript = labelled_transcript.applymap(lambda x: x.strip() if isinstance(x, str) else x)

labelled_note.columns = labelled_note.columns.str.strip()
labelled_note = labelled_note.applymap(lambda x: x.strip() if isinstance(x, str) else x)


  labelled_transcript = labelled_transcript.applymap(lambda x: x.strip() if isinstance(x, str) else x)
  labelled_note = labelled_note.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [7]:
(set(labelled_transcript.columns) - set(["Timestamp", "Script"])) == (set(labelled_note.columns) - set(["Sentences"]))

True

# Per feature comparison

In [22]:
def collectSentences(df, ref, feature_i, value):
    # Step 1: Get rows where feature_i equals the target value
    if value == 'N':
        match_values = {'negative', 'Negative', 'n', 'N'}
    elif value == 'P':
        match_values = {'positive', 'Positive', 'p', 'P'}
    else:
        match_values = {value}

    # Filter rows where feature_i is in the set of match_values
    matches = df[df[feature_i].isin(match_values)]

    # Step 2: Create list of (index, sentence)
    result = [(idx, row[ref]) for idx, row in matches.iterrows()]

    # Step 3: Merge adjacent entries
    merged = []
    start_idx, current_idx, current_text = result[0][0], result[0][0], result[0][1]

    for next_idx, next_text in result[1:]:
        if next_idx == current_idx + 1:
            # Continue current group
            current_text += " " + next_text
            current_idx = next_idx
        else:
            # End current group and start new
            merged.append(((start_idx, current_idx), current_text))
            start_idx = current_idx = next_idx
            current_text = next_text

    # Add final group
    merged.append(((start_idx, current_idx), current_text))

    return merged


In [39]:
import emoji 

# Variables for completeness and curation scores

features_in_transcript = 0
correct_features_in_note = 0

features_not_in_transcript = 0
correct_features_not_in_note = 0

# Iterate through features
for feature_i in (set(labelled_transcript.columns) - set(["Timestamp", "Script"])):
    print("\n<<<<<<\n")
    # value counts
    vc_transcript = labelled_transcript[feature_i].value_counts().to_dict() if labelled_transcript[feature_i].notna().any() else {'M' : -1}
    vc_note = labelled_note[feature_i].value_counts().to_dict() if labelled_note[feature_i].notna().any() else {'M' : -1}

    # check that each risk factor only has one label
    assert len(vc_transcript.keys())==1 and len(vc_note.keys())==1, f"Labels are not size 1 {vc_transcript.keys()} {vc_note.keys()}"

    # check that values are valid
    valid_keys = {"Positive", "Negative", "M"}
    assert set(vc_transcript.keys()).issubset(valid_keys), f"Invalid keys found!"
    assert set(vc_note.keys()).issubset(valid_keys), "Invalid keys found!"
    
    # map keys to one-letter
    vc_transcript = {("P" if k == "Positive" else "N" if k == "Negative" else k): v for k, v in vc_transcript.items()}
    vc_note = {("P" if k == "Positive" else "N" if k == "Negative" else k): v for k, v in vc_note.items()}

    # collect counts for completeness and curation

    if 'M' in vc_transcript:
        features_not_in_transcript += 1

        if 'M' in vc_note:
            correct_features_not_in_note += 1
        else:
            pass
            # print("WRONG1", feature_i, vc_transcript, vc_note)
    else:
        features_in_transcript += 1

        if 'M' not in vc_note:
            correct_features_in_note += 1
        else:
            pass
             # print("WRONG2", feature_i, vc_transcript, vc_note)
    
    # If both are missing, there are no problems
    if list(vc_transcript.keys())==['M'] and list(vc_note.keys())==['M']:
        print(emoji.emojize(':thumbs_up:'), f"{feature_i} is M in both transcript and note")
        continue 

    # compare (for loop just for ease of implementation)
    for key_transcript in vc_transcript:
        for key_note in vc_note:
            if key_transcript == key_note: # no problems
                print(emoji.emojize(':thumbs_up:'), f"{feature_i} is {key_transcript} in both transcript and note")
            elif (key_transcript, key_note) in [
                ('P', 'N'),
                ('P', 'M'), 
                ('N', 'P'),
                ('N', 'M')
            ]: # knowledge gap error
                print(f"{feature_i} has a knowledge gap error")

                print(f"The transcript says that the presence of {feature_i} is {'Positive' if key_transcript else 'Negative'}")
                print(collectSentences(labelled_transcript, 'Script', feature_i, key_transcript))

                if key_note == 'M':
                    print(f"The note does not mention the presence of {feature_i}")
                else:
                    print(f"The note says that the presence of {feature_i} is {'Positive' if key_transcript else 'Negative'}")
                    print(collectSentences(labelled_note, 'Sentences', feature_i, key_note))
                    
            elif (key_transcript, key_note) in [
                ('M', 'P'),
                ('M', 'N')
            ]: # hallucination error
                print(f"{feature_i} has a hallucination error")

                print(f"The transcript does not mention the presence of {feature_i}")

                print(f"The note says that the presence of {feature_i} is {'Positive' if key_transcript else 'Negative'}")
                print(collectSentences(labelled_note, 'Sentences', feature_i, key_note))
                
            else: # should never reach this point
                print(emoji.emojize(':angry_face_with_horns:'), "PROBLEM IN CODE")


<<<<<<

👍 Baseline Laboratory Values - Coagulopathy (PTT, PT/INR) is M in both transcript and note

<<<<<<

👍 Placenta Previa is P in both transcript and note

<<<<<<

👍 Polyhydramnios is P in both transcript and note

<<<<<<

👍 Baseline Laboratory Values - Platelet Count is P in both transcript and note

<<<<<<

Placenta Accreta Spectrum or concern has a hallucination error
The transcript does not mention the presence of Placenta Accreta Spectrum or concern
The note says that the presence of Placenta Accreta Spectrum or concern is Positive
[((10, 10), 'Jane Blank is a 35-year-old G4P3003 at 33w3d with a history of one prior vaginal delivery followed by two cesarean deliveries, the most recent of which was complicated by postpartum hemorrhage (estimated blood loss 1.7 L), requiring uterotonic medications but no transfusion. She is currently scheduled for a repeat cesarean section due to placenta previa, as identified on prenatal ultrasound. Imaging does not show evidence of placenta a

In [40]:
print("Completeness Score/Sensitivity:", correct_features_in_note/features_in_transcript)
print("Curation Score/Specificity:", correct_features_not_in_note/features_not_in_transcript)


Completeness Score/Sensitivity: 0.75
Curation Score/Specificity: 0.5
