In [None]:
import pandas as pd
import os
import re

# === CONFIGURATION ===
INPUT_CSV = "1 row per note.csv"

#CHANGE THIS LINE
REVIEW_OUTPUT_CSV = "andrew_review.csv"


ALL_COLUMNS = ["TOTAL_FLAGS", "PAT_ENC_CSN_ID", "NOTE_ID", "IP_NOTE_TYPE_C", "CRT_DTTM", "NOTE_TEXT"]
HIGHLIGHT_TERMS = ["PHRASE 1","PHRASE 2","PHRASE 3"]

# === Load data ===
df = pd.read_csv(INPUT_CSV, header=None)
df.columns = ALL_COLUMNS

# UNCOMMENT TO JUST SEE PARTICUALR FLAGS
#df = df[df["TOTAL_FLAGS"]>6].copy()


# UNCOMMENT TO ONLY SEE NOTES WITH PARTICULAR WORDS IN THEM
#note_mask = df["NOTE_TEXT"].str.contains(r'\b(nangs|nitrous)\b', flags=re.IGNORECASE, na=False)
#df = df[note_mask]
df = df.sort_values(by="TOTAL_FLAGS", ascending=False)

RELEVANT_COLUMNS = ["TOTAL_FLAGS", "PAT_ENC_CSN_ID", "NOTE_ID", "IP_NOTE_TYPE_C", "NOTE_TEXT"]
df = df[RELEVANT_COLUMNS]

In [None]:
# # Load or initialise manual review file
if os.path.exists(REVIEW_OUTPUT_CSV):
    reviewed_df = pd.read_csv(REVIEW_OUTPUT_CSV)
    reviewed_csns = set(reviewed_df["PAT_ENC_CSN_ID"])
else:
    reviewed_df = pd.DataFrame(columns=["PAT_ENC_CSN_ID", "TOTAL_FLAGS", "MANUAL_REVIEW"])
    reviewed_csns = set()

# Highlighting function
def highlight_text(text):
    for term in HIGHLIGHT_TERMS:
        pattern = re.compile(rf'\b({term})\b', flags=re.IGNORECASE)
        text = pattern.sub(r'\033[1;33m\1\033[0m', text)
    return text

# === Review Loop ===
csn_groups = df.sort_values(by="TOTAL_FLAGS", ascending=False).groupby("PAT_ENC_CSN_ID")

for csn, group in csn_groups:
    if csn in reviewed_csns:
        continue

    decision = None
    has_question = False

    # Prioritise non-discharge notes first (IP_NOTE_TYPE_C != 5)
    non_discharge_notes = group[group["IP_NOTE_TYPE_C"] != 5]
    discharge_notes = group[group["IP_NOTE_TYPE_C"] == 5]
    combined_notes = pd.concat([non_discharge_notes, discharge_notes], ignore_index=True)

    for _, row in combined_notes.iterrows():
        print(f"\n--- NOTE ID: {row['NOTE_ID']} | TOTAL_FLAGS: {row['TOTAL_FLAGS']} | TYPE: {row['IP_NOTE_TYPE_C']} ---")
        print(highlight_text(row['NOTE_TEXT']))

        while True:
            user_input = input("Label this note (1 = positive, 0 = negative, ? = unsure): ").strip()
            if user_input in ["1", "0", "?"]:
                break
            print("Invalid input. Please type 1, 0, or ?.")

        if user_input == "1":
            decision = "1"
            break  # Stop reviewing this CSN
        elif user_input == "?":
            has_question = True
        # If "0", continue

    # Finalise decision for this CSN
    if decision != "1":
        if has_question:
            decision = "?"
        else:
            decision = "0"

    # Save decision
    total_flags = group["TOTAL_FLAGS"].iloc[0]
    reviewed_df = pd.concat([reviewed_df, pd.DataFrame([{
        "PAT_ENC_CSN_ID": csn,
        "TOTAL_FLAGS": total_flags,
        "MANUAL_REVIEW": decision
    }])], ignore_index=True)

    # Save progress after each CSN
    reviewed_df.to_csv(REVIEW_OUTPUT_CSV, index=False)
    print(f"✓ Saved CSN {csn} as {decision}")

print("✅ All done!")