In [None]:
from openai import AzureOpenAI

import pandas as pd
import numpy as np
import os, json, spacy
import IRAEUtils

from sklearn.metrics import classification_report

In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Config paramaters

In [None]:
#GPT_DEPLOYMENT = "OAI05-GPT35Turbo16-0613_061823"
#GPT_DEPLOYMENT = "V04-GPT4Turbo-2024-04-09"
GPT_DEPLOYMENT = "V05-GPT4o"

project_path = os.getenv("VU_PROJ_PATH") + "immunotoxicity/"
notes_path = project_path + "in/data4llm/"

# columns: FileName (with note content), all irAE tyes in the datasets
# rows: irAE annotations for each note
annotations_path = project_path  +"in/data4llm/Map.File-irAELabels.csv"

# synsets associated with each irAE type
irae_synsets_path = project_path + "/map_irae_prompt_detail.csv"
path_eval = f"{project_path}out/llm/eval-note-level/{GPT_DEPLOYMENT}/"

filter_list_irae_full = ['Neuropathy', 'Hypothyroid', 'Myasthenia gravis (MG)', 'Rash', 'Colitis', 'Adrenal insufficiency', 'Hepatitis', 'Arthralgia', 'Duodenitis', 'Pancreatitis', 'Hypophysitis', 'Mucositis', 'Arthritis', 'Pneumonitis', 'Joint pain', 'Fever', 'Myalgia'] 
list_fname_irae_full = ['FileName'] + filter_list_irae_full

llm = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI05_API_KEY"),  
  api_version =  "2024-07-01-preview",
  azure_endpoint = os.getenv("AZURE_OPENAI05_ENDPOINT")
)

# used for light processing of LLM outputs
nlp = spacy.load('en_core_web_sm')

## Load resources: maps & manual annotations

In [None]:
## maps: IRAE labes: full | norm -> large categories
##
dict_map_full2norm = IRAEUtils.read2cols_2dict(f"{project_path}in/data4llm/IRAE.labels.reverse.csv", 1, 2, ",", True)
dict_map_norm2large = IRAEUtils.read2cols_2dict(f"{project_path}out/llm/eval-patient-level/map-specific2generic/IRAE.map.refined-large.03.edits.csv", 0, 1, ",", True)
dict_map_full2large = dict()
for key, value in dict_map_full2norm.items():
    if key == 'None':
        continue  # Skip this key-value pair
    dict_map_full2large[key] = dict_map_norm2large[dict_map_full2norm[key]]

list_coll_irae_large = IRAEUtils.read1col_2list_skip1(f"{project_path}out/llm/eval-patient-level/map-specific2generic/IRAE.map.refined-large.03.edits.csv", 1, ",")
set_irae_large = set(list_coll_irae_large)
sorted_list_irae_large = sorted(set_irae_large)

# [2] load df[notes & irAE annotations]
#
notelist = []
with open(annotations_path, 'r') as f:    
    next(f) # Skip the first line
    for line in f:    
        cols = line.split(',')
        #print(os.path.join(notes_path, cols[0]))
        with open(os.path.join(notes_path, cols[0]), 'r') as datafile:
            notelist.append({"FileName":cols[0], "text":datafile.read()})       
df_notes = pd.DataFrame(notelist) 
df_labels = pd.read_csv(annotations_path)

df_gold_full = pd.merge(df_labels, df_notes, on='FileName')
df10 = df_gold_full.head(10)

#display(df_gold_full)
#display(df_gold_full[list_fname_irae_full])
#display(df_gold_full[filter_list_irae_full])
#print(df_gold_full[filter_list_irae_full].sum(axis=0))
#print(df10[filter_list_irae_full].sum(axis=0))
print(f"Number of notes: {len(df_gold_full)}")

# [3] load irAE sysnset map
#
list_irae_synsets =  IRAEUtils.read2cols_2list_all(irae_synsets_path, 0, 1, "|")

dict_irae_synsets = dict()
for tuple2 in list_irae_synsets : 
    dict_irae_synsets[tuple2[0]] = tuple2[1]
#print(dict_irae_synsets)

# [4] build the list of binary questions in json format that will be included into the prompt
#
ici_list = "atezolizumab (tecentriq, atezo), avelumab (bavencio), durvalumab (imfinzi), ipilimumab (yervoy), nivolumab (opdivo, nivo), pembrolizumab (keytruda, pembro)"

In [None]:
## irAE counts - note level
list_all_irae_full = [irae_label for irae_label in df_gold_full.columns if irae_label not in ['FileName', 'text', 'GRID']]
#df_gold_full['GRID'] = df_gold_full['FileName'].apply(lambda x: x.split('.')[3])

df_gold_full[list_all_irae_full].sum().to_csv(project_path + "/out/llm/notes/batches/IRAE.counts.note-level.cohort-note-subset.csv")

display(df_gold_full[list_all_irae_full].sum())

filter2_list_irae_full  = []
for irae_full in list_all_irae_full:
        column_sum = df_gold_full[irae_full].sum()
        if column_sum > 0 :
                filter2_list_irae_full.append(irae_full)


print(f'Total irAE annotated notes: {len(df_gold_full)}')
print(list_all_irae_full)
print(sorted(filter2_list_irae_full))
print(sorted(filter_list_irae_full))

#display(df_gold_full[['GRID'] + list_fname_irae_full])
#display([list_fname_irae_full])
#display(df_gold_full[list_fname_irae_full])

## Convert full to large irAEs

In [None]:
## init row dictionaries for converting specific to large irAE categs
## 
def init_dict_large_irae(list_irae_large) : 
    dict_large_irae = dict()
    for irae_large in list_irae_large :
        dict_large_irae[irae_large] = 0

    return dict_large_irae

## convert specific to large irAE patient dataframe
## 
def convert_specific_large(df_irae_full, dict_map_full2large, list_irae_full, list_irae_large) :
    df_irae_large = pd.DataFrame(columns = list_irae_large)    

    for index, row in df_irae_full.iterrows():
        dict_row = init_dict_large_irae(list_irae_large)
        for irae_full in list_irae_full :
            if row[irae_full] == 1 :
                dict_row[dict_map_full2large[irae_full]] = 1
                
        df_irae_large = pd.concat([df_irae_large, pd.DataFrame([dict_row])], ignore_index=True)
    
    return df_irae_large

## convert gold full irAE to gold large irAE
##
df_gold_large = convert_specific_large(df_gold_full[filter_list_irae_full], dict_map_full2large, filter_list_irae_full, sorted_list_irae_large)
display(df_gold_large)

## Build irAE large filter list (exclude null irAEs large labels)
##
filter_list_irae_large  = []
for irae_large in sorted_list_irae_large:
        column_sum = df_gold_large[irae_large].sum()
        if column_sum > 0 :
                filter_list_irae_large.append(irae_large)

print(f"sorted_list_irae_large:{len(sorted_list_irae_large)} -- filter_list_irae_large:{len(filter_list_irae_large)}")
print(set(sorted_list_irae_large) - set(filter_list_irae_large))

## IRAE zero-shot prompt

In [None]:
def get_irae_list_json(irae_list):
    s = '{'
    for irae_label in irae_list :
        s += f'''\n"{irae_label}": Output 'Yes' if the patient has experienced {dict_irae_synsets[irae_label]} because of exposure to one or more immune checkpoint inhibitors. Otherwise, output 'No'.,'''
    s += '}'

    return s

print(get_irae_list_json(filter_list_irae_full))

def prompt_specific_json(note_text, ici_list, irae_list):
    irae_json_format = get_irae_list_json(irae_list)
    messages = []
    messages.append({"role": "system", "content": f"""You are a clinical expert in identifying immune-related adverse events (irAEs) caused by immune checkpoint inhibitors (ICIs).                                           
                     You will receive as input a patient note corresponding to a patient who was treated or is currently treated with one or multiple immune checkpoint inhibitors (ICIs) from the following ICI list: {ici_list}. 
                     Your task is to determine if the patient note describes any of the immune-related adverse events (irAEs) experienced by the patient and caused by immune checkpoint inhibitors.
                     Output your response in a JSON format using the following structure: 
                     {irae_json_format}"""})
    messages.append({"role": "user", "content": f"""Does the following patient note describe immune-related adverse 
                     events experienced by the patient? 
                     Patient note: {note_text}"""})                     
    return messages

#print(prompt_specific_json(" .. test .. ", ici_list, filter_list_irae_full))

## LLM output processisng

In [None]:
## Returns <Flag , list01> True if successfully parsed
##
def convert_yn_dict_to_01_list(dict_irae_reponse, irae_list, nlp) :
    response_01_list = []
    for irae_elem in irae_list :
        response_01_elem = IRAEUtils.convert_llm_response_to_01(dict_irae_reponse[irae_elem], nlp)
        #print(f"{irae_elem} : {dict_irae_reponse[irae_elem]} : {response_01_elem}")
        response_01_list.append(response_01_elem)
        if response_01_elem == -1 :
            return False, response_01_list
    
    return True, response_01_list

##
##
def filter_invalid_llm_responses(y_gold, y_llm_yn, irae_list, nlp) :
    #print(f"before: filter_invalid_llm_responses: {len(y_gold)} <> {len(y_llm_yn)}")
    
    y_gold_filter = np.empty((0, len(irae_list)))
    y_llm_filter = np.empty((0, len(irae_list)))

    for row_y_gold, llm_response_yn in zip(y_gold, y_llm_yn) :
        #print(f"\nrow_y_gold({row_y_gold})")
        #print(f"llm_response_yn({llm_response_yn})")
        json_llm_response_yn = llm_response_yn.removeprefix("```json").removesuffix("```").strip()
        if IRAEUtils.is_json(json_llm_response_yn) :
            dict_irae_reponse = json.loads(json_llm_response_yn)
            flag, llm_response_01 = convert_yn_dict_to_01_list(dict_irae_reponse, irae_list, nlp)

            if flag == True :
                #print(f"llm_response_01({llm_response_01})")
                y_gold_filter = np.vstack([y_gold_filter, row_y_gold])
                y_llm_filter = np.vstack([y_llm_filter, llm_response_01])

                #print(f"\ny_gold_filter({y_gold_filter})")
                #print(f"y_llm_filter({y_llm_filter})")

    #print(f"after: filter_invalid_llm_responses: {len(y_gold_filter)} <> {len(y_llm_filter)}")
    return y_gold_filter, y_llm_filter

## Run LLMs

In [None]:
#X_all = df10[["text"]].values
#y_all = df10[filter_list_irae_full].values

X_all = df_gold_full[["text"]].values
y_all = df_gold_full[filter_list_irae_full].values

exception_list = []
y_llmresponses = []
#for note in X_all:
for index, note in enumerate(X_all):
    try:
        llm_response = llm.chat.completions.create(model = GPT_DEPLOYMENT,
            temperature=0.0, max_tokens=500, n = 1,
            frequency_penalty=0, presence_penalty=0, seed = 13,     
            #top_p=1, ## reco: alter this param or temp but not both https://platform.openai.com/docs/api-reference/chat/create            
            #messages = prompt_func(note))
            messages = prompt_specific_json(note, ici_list, filter_list_irae_full))                                
        
        print("Note: "+str(index))
        #print("Prompt: "+str(prompt_func(note, ici_list, irae_list)))
        print(llm_response.choices[0].message.content.strip())
        y_llmresponses.append(llm_response.choices[0].message.content.strip())
        #print(response)
        #print('.', end='', flush=True)
    except Exception as e:            
        print("LLMException: "+str(e).strip().replace('\n', ' '))
        y_llmresponses.append("LLMException: "+str(e).strip().replace('\n', ' '))
        exception_list.append("LLMException: "+str(e).strip().replace('\n', ' '))


## Evaluation [full]: filtered irAE full labels

In [None]:
y_all_filter, y_llmresponses_filter = filter_invalid_llm_responses(y_all, y_llmresponses, filter_list_irae_full, nlp)

clf_report = classification_report(y_all_filter, y_llmresponses_filter, target_names = filter_list_irae_full, zero_division=0, output_dict=True)
df_clf_report = pd.DataFrame(clf_report).transpose()
display(df_clf_report)

df_irae_full_eval = IRAEUtils.irae_eval(y_all_filter, y_llmresponses_filter, filter_list_irae_full)
display(df_irae_full_eval)

df_clf_report.to_csv(f"{path_eval}EVAL-FULL.CLF-REPORT.{GPT_DEPLOYMENT}.csv", index=True)
df_irae_full_eval.to_csv(f"{path_eval}EVAL-FULL.DETAILED-REPORT.{GPT_DEPLOYMENT}.csv", index=False)

## Evaluation [large]: filtered irAE large labels

In [None]:
## Convert np.array with binary labels for irAE full to df with binary labels for irAE large
##
def convert_np_full_to_df_large(np_irae_full, dict_map_full2large, list_irae_full, list_irae_large) :
    df_irae_large = pd.DataFrame(columns = list_irae_large)
     
    for row in np_irae_full :
        dict_row = init_dict_large_irae(list_irae_large)
        for index_irae_full, label_irae_full in enumerate(list_irae_full):
            if row[index_irae_full] == 1 :
                    dict_row[dict_map_full2large[label_irae_full]] = 1

        df_irae_large = pd.concat([df_irae_large, pd.DataFrame([dict_row])], ignore_index=True)
    
    return df_irae_large


df_y_all_filter = convert_np_full_to_df_large(y_all_filter, dict_map_full2large, filter_list_irae_full, filter_list_irae_large)
df_y_llmresponses_filter = convert_np_full_to_df_large(y_llmresponses_filter, dict_map_full2large, filter_list_irae_full, filter_list_irae_large)

np_y_all_filter = df_y_all_filter.to_numpy().astype(int)
np_y_llmresponses_filter = df_y_llmresponses_filter.to_numpy().astype(int)

final_clf_report = classification_report(np_y_all_filter, np_y_llmresponses_filter, target_names = filter_list_irae_large, zero_division=0, output_dict=True)
final_clf_report = pd.DataFrame(final_clf_report).transpose()
display(final_clf_report)

final_df_irae_large_eval = IRAEUtils.irae_eval(np_y_all_filter, np_y_llmresponses_filter, filter_list_irae_large)
display(final_df_irae_large_eval)

final_clf_report.to_csv(f"{path_eval}EVAL-LARGE.CLF-REPORT.{GPT_DEPLOYMENT}.csv", index=True)
final_df_irae_large_eval.to_csv(f"{path_eval}EVAL-LARGE.DETAILED-REPORT.{GPT_DEPLOYMENT}.csv", index=False)