## Import

In [3]:
import os
import regex
import sys
import json
import random
import tiktoken
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime

In [4]:
def seed_everything(seed):
    seed = int(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

In [5]:
seed_everything(42)

In [6]:
from model.azure import (
    create_azure_batch_data,
    prepare_azure_batch_data,
    merge_azure_batch_data,
    parse_azure_batch_result,
    process_azure_result_to_task_result,
    cost_calculation_token
)

## Dataset

In [7]:
path_dir = "dataset_raw"
list_path_file = os.listdir(path_dir)
list_task_name = [file.split("/")[-1].replace(".SFT.json", "") for file in list_path_file if file.endswith(".SFT.json")]
dict_task_path = {task_name: os.path.join(path_dir, path_file) for task_name, path_file in zip(list_task_name, list_path_file)}
dict_task_path = dict(sorted(dict_task_path.items(), key=lambda x: int(x[0].split(".")[0] if "-" not in x[0].split(".")[0] else x[0].split("-")[0])))
print(f"Searching {len(dict_task_path)} files in {path_dir}")

Searching 52 files in dataset_raw


In [8]:
dict_task_path

{'1-3.ADE-Drug dosage': 'dataset_raw/1-3.ADE-Drug dosage.SFT.json',
 '1-2.ADE-ADE relation': 'dataset_raw/1-2.ADE-ADE relation.SFT.json',
 '1-1.ADE-ADE identification': 'dataset_raw/1-1.ADE-ADE identification.SFT.json',
 '5.BrainMRI-AIS': 'dataset_raw/29.EHRQA.sub_department.SFT.json',
 '6.Brateca.mortality': 'dataset_raw/6.Brateca.mortality.SFT.json',
 '6.Brateca.hospitalization': 'dataset_raw/6.Brateca.hospitalization.SFT.json',
 '7.Cantemist.NER': 'dataset_raw/102.iCorpus.SFT.json',
 '7.Cantemist.CODING': 'dataset_raw/100.GraSSCo_PHI.SFT.json',
 '7.Cantemist.Norm': 'dataset_raw/7.Cantemist.NER.SFT.json',
 '8.CARES.icd10_chapter': 'dataset_raw/8.CARES.icd10_chapter.SFT.json',
 '8.CARES.icd10_block': 'dataset_raw/29.EHRQA.primary_department.SFT.json',
 '8.CARES.area': 'dataset_raw/8.CARES.icd10_block.SFT.json',
 '8.CARES.icd10_sub_block': 'dataset_raw/23.cMedQA.SFT.json',
 '9.CHIP-CDEE': 'dataset_raw/9.CHIP-CDEE.SFT.json',
 '12.C-EMRS': 'dataset_raw/107.MIMIC-IV BHC.SFT.json',
 '19.Cl

## Data Preparation

### One test

In [7]:
model_name = "gpt-35-turbo-batch"
task_name = "1-1.ADE-ADE identification"
prompt_mode = 'direct-5-shot'

#### Formatted Data

In [8]:
with open(dict_task_path[task_name], "r") as file:
    list_dict_data = json.load(file)

In [9]:
if "shot" in prompt_mode:
    num_example = int(regex.findall(r"\d+", prompt_mode)[0])
    path_file_example = f"dataset_raw/example/{task_name}.example.json"
    with open(path_file_example, "r", encoding="utf-8") as f:
        list_dict_example = json.load(f)
    examples = list_dict_example[:num_example]
    print(f" - Prepare {num_example} examples")
else:
    examples = []
    print(" - No example")

 - Prepare 5 examples


In [10]:
list_dict_data_batch = prepare_azure_batch_data(
    task_name=task_name,
    model_name=model_name,
    prompt_mode=prompt_mode,
    split=['test'],
    list_dict_data=list_dict_data,
    examples=examples,
)

 - Loading tokenizer of gpt-35-turbo
 - Max token input: 13307
 - Max token output: 3072


In [11]:
list_dict_data_batch[0]

{'custom_id': "1-1.ADE-ADE identification|gpt-35-turbo-batch|direct-5-shot|['test']|0",
 'method': 'POST',
 'url': '/chat/completions',
 'body': {'model': 'gpt-35-turbo-batch',
  'messages': [{'role': 'system',
    'content': 'Given the clinical text, determine whether the text mentions adverse drug effects.\nReturn your answer in the following format. DO NOT GIVE ANY EXPLANATION:\nadverse drug effect: label\nThe optional list for "label" is ["Yes", "No"].\n\nExamples:\nInput:\nSevere sulfadiazine hypersensitivity in a child with reactivated congenital toxoplasmic chorioretinitis.\nOutput:\nadverse drug effect: Yes\n\nInput:\nMarked hyperkalemia was observed during and immediately after an infusion of arginine monohydrochloride in two patients with severe hepatic disease and moderate renal insufficiency.\nOutput:\nadverse drug effect: Yes\n\nInput:\nMany new serotonergic antidepressants have been introduced over the past decade.\nOutput:\nadverse drug effect: No\n\nInput:\nUntreated tu

In [12]:
list_dict_data_batch[0]['body']['messages']

[{'role': 'system',
  'content': 'Given the clinical text, determine whether the text mentions adverse drug effects.\nReturn your answer in the following format. DO NOT GIVE ANY EXPLANATION:\nadverse drug effect: label\nThe optional list for "label" is ["Yes", "No"].\n\nExamples:\nInput:\nSevere sulfadiazine hypersensitivity in a child with reactivated congenital toxoplasmic chorioretinitis.\nOutput:\nadverse drug effect: Yes\n\nInput:\nMarked hyperkalemia was observed during and immediately after an infusion of arginine monohydrochloride in two patients with severe hepatic disease and moderate renal insufficiency.\nOutput:\nadverse drug effect: Yes\n\nInput:\nMany new serotonergic antidepressants have been introduced over the past decade.\nOutput:\nadverse drug effect: No\n\nInput:\nUntreated tumors displayed continued growth.\nOutput:\nadverse drug effect: No\n\nInput:\nIncreasing the olanzapine dosage severely aggravated the symptoms of RLS.\nOutput:\nadverse drug effect: Yes\n\nRef

In [13]:
print(list_dict_data_batch[0]['body']['messages'][0]['content'])

Given the clinical text, determine whether the text mentions adverse drug effects.
Return your answer in the following format. DO NOT GIVE ANY EXPLANATION:
adverse drug effect: label
The optional list for "label" is ["Yes", "No"].

Examples:
Input:
Severe sulfadiazine hypersensitivity in a child with reactivated congenital toxoplasmic chorioretinitis.
Output:
adverse drug effect: Yes

Input:
Marked hyperkalemia was observed during and immediately after an infusion of arginine monohydrochloride in two patients with severe hepatic disease and moderate renal insufficiency.
Output:
adverse drug effect: Yes

Input:
Many new serotonergic antidepressants have been introduced over the past decade.
Output:
adverse drug effect: No

Input:
Untreated tumors displayed continued growth.
Output:
adverse drug effect: No

Input:
Increasing the olanzapine dosage severely aggravated the symptoms of RLS.
Output:
adverse drug effect: Yes

Refer to the provided examples, please generate the output for the f

In [14]:
print(list_dict_data_batch[0]['body']['messages'][1]['content'])

Input:
ANAMNESIS
Mujer de 67 años con antecedentes personales de hipotiroidismo en tratamiento con levotiroxina y fumadora activa de 12.5 paquetes/año. Consulta en Urgencias por sensación progresiva de “acorchamiento y hormigueos” en ambos miembros superiores e inferiores, así como una dificultad progresiva para la deambulación de 4 meses de evolución, asociando asimismo alteración de la memoria reciente desde el último mes

EXPLORACIÓN FÍSICA
Presentaba una exploración cardiopulmonar y abdominal normal. En la exploración neurológica destaca balance motor por grupos musculares conservado; arreflexia rotuliana y aquilea e hiporreflexia bicipital; tetrahipoestesia asimétrica (táctil, algésica, vibratoria y posicional) en patrón de “guante y calcetín altos” de predominio izquierdo; Romberg positivo y marcha con leve aumento de base de sustentación que impresiona ataxia sensitiva.

PRUEBAS COMPLEMENTARIAS
En las exploraciones complementarias, hemograma, bioquímica y coagulación no presenta

#### Create and Process

In [8]:
list_dict_data_batch = create_azure_batch_data(
    task_name=task_name,
    model_name=model_name,
    prompt_mode=prompt_mode,
    split="test",
    temperature=0,
    top_p=0,
    frequency_penalty=0,
    presence_penalty=0,
    max_token_input=100 * 1024,
    max_token_output=3072,
)

Task: 1-1.ADE-ADE identification
 - Test split: 2097 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Save 2097 to azure/input/gpt-35-turbo-batch/direct/1-1.ADE-ADE identification.batch.jsonl


In [9]:
list_dict_data_batch[0]

{'custom_id': '1-1.ADE-ADE identification|gpt-35-turbo-batch|direct|test|18869',
 'method': 'POST',
 'url': '/chat/completions',
 'body': {'model': 'gpt-35-turbo-batch',
  'messages': [{'role': 'system',
    'content': 'Given the clinical text, determine whether the text mentions adverse drug effects.\nReturn your answer in the following format. DO NOT GIVE ANY EXPLANATION:\nadverse drug effect: label\nThe optional list for "label" is ["Yes", "No"].'},
   {'role': 'user',
    'content': 'We present a patient who developed spontaneous pulmonary hemorrhage during thrombolytic therapy.'}],
  'temperature': 0,
  'top_p': 0,
  'frequency_penalty': 0,
  'presence_penalty': 0,
  'max_tokens': 3072,
  'seed': 42}}

#### Test preparation

In [10]:
split = "test"
path_file_task = f"dataset_fine/all/{task_name}.SFT.json"
with open(path_file_task, "r") as f:
    list_dict_data = json.load(f)
list_dict_data = [ dict_data for dict_data in list_dict_data if dict_data["split"] == split ]
print(f"Task: {task_name}, with {len(list_dict_data)} testing samples")
exp_name = f"{task_name}|{model_name}|{prompt_mode}|{split}"
list_dict_data_batch = prepare_azure_batch_data(
    task_name= task_name,
    model_name=model_name,
    prompt_mode=prompt_mode,
    split=split,
    list_dict_data=list_dict_data,
    examples=[],
    temperature=0,
    top_p=0,
    frequency_penalty=0,
    presence_penalty=0,
    max_token_input=100 * 1024,
    max_token_output=3072)

Task: 1-1.ADE-ADE identification, with 2097 testing samples
 - Loading tokenizer of gpt-35-turbo


In [11]:
list_dict_data_batch[0]

{'custom_id': '1-1.ADE-ADE identification|gpt-35-turbo-batch|direct|test|18869',
 'method': 'POST',
 'url': '/chat/completions',
 'body': {'model': 'gpt-35-turbo-batch',
  'messages': [{'role': 'system',
    'content': 'Given the clinical text, determine whether the text mentions adverse drug effects.\nReturn your answer in the following format. DO NOT GIVE ANY EXPLANATION:\nadverse drug effect: label\nThe optional list for "label" is ["Yes", "No"].'},
   {'role': 'user',
    'content': 'We present a patient who developed spontaneous pulmonary hemorrhage during thrombolytic therapy.'}],
  'temperature': 0,
  'top_p': 0,
  'frequency_penalty': 0,
  'presence_penalty': 0,
  'max_tokens': 3072,
  'seed': 42}}

## Porcess all

In [35]:
path_dir = "dataset_raw"
list_path_file = os.listdir(path_dir)
list_task_name = [file.split("/")[-1].replace(".SFT.json", "") for file in list_path_file if file.endswith(".SFT.json")]
dict_task_path = {task_name: os.path.join(path_dir, path_file) for task_name, path_file in zip(list_task_name, list_path_file)}
dict_task_path = dict(sorted(dict_task_path.items(), key=lambda x: int(x[0].split(".")[0] if "-" not in x[0].split(".")[0] else x[0].split("-")[0])))
print(f"Searching {len(dict_task_path)} files in {path_dir}")

Searching 49 files in dataset_raw


### Generate batch data jsonl for each task

In [30]:
def proceed_task(dict_task_path, model_name, prompt_mode, split="test"):
    for task_name, path_file_task in dict_task_path.items():
        list_dict_data_batch = create_azure_batch_data(
            task_name=task_name,
            model_name=model_name,
            prompt_mode=prompt_mode,
            split=split,
            temperature=0,
            top_p=0,
            frequency_penalty=0,
            presence_penalty=0,
            max_token_input=100 * 1024,
            max_token_output=2*1024,
        )
        print("========================================")

In [9]:
model_name = "gpt-35-turbo-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 1-1.ADE-ADE identification
 - Test split: 2097 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 16384
 - Max token output: 3072
 - Save 2097 to azure/input/gpt-35-turbo-batch/direct-5-shot/1-1.ADE-ADE identification.batch.jsonl
Task: 1-2.ADE-ADE relation
 - Test split: 428 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 16384
 - Max token output: 3072
 - Save 428 to azure/input/gpt-35-turbo-batch/direct-5-shot/1-2.ADE-ADE relation.batch.jsonl
Task: 1-3.ADE-Drug dosage
 - Test split: 193 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 16384
 - Max token output: 3072
 - Save 193 to azure/input/gpt-35-turbo-batch/direct-5-shot/1-3.ADE-Drug dosage.batch.jsonl
Task: 5.BrainMRI-AIS
 - Test split: 303 samples
 - M

In [10]:
model_name = "gpt-4o-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 1-1.ADE-ADE identification
 - Test split: 2097 samples
 - Model: gpt-4o-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 3072
 - Save 2097 to azure/input/gpt-4o-batch/direct-5-shot/1-1.ADE-ADE identification.batch.jsonl
Task: 1-2.ADE-ADE relation
 - Test split: 428 samples
 - Model: gpt-4o-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 3072
 - Save 428 to azure/input/gpt-4o-batch/direct-5-shot/1-2.ADE-ADE relation.batch.jsonl
Task: 1-3.ADE-Drug dosage
 - Test split: 193 samples
 - Model: gpt-4o-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 3072
 - Save 193 to azure/input/gpt-4o-batch/direct-5-shot/1-3.ADE-Drug dosage.batch.jsonl
Task: 5.BrainMRI-AIS
 - Test split: 303 samples
 - Model: gpt-4o-batch
 - Prompt: direct-5-shot
 - Prep

In [15]:
model_name = "gpt-35-turbo-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct")
proceed_task(dict_task_path, model_name, prompt_mode="cot")

Task: 1-1.ADE-ADE identification
 - Test split: 2097 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Save 2097 to azure/input/gpt-35-turbo-batch/direct/1-1.ADE-ADE identification.batch.jsonl
Task: 1-2.ADE-ADE relation
 - Test split: 428 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Save 428 to azure/input/gpt-35-turbo-batch/direct/1-2.ADE-ADE relation.batch.jsonl
Task: 1-3.ADE-Drug dosage
 - Test split: 193 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Save 193 to azure/input/gpt-35-turbo-batch/direct/1-3.ADE-Drug dosage.batch.jsonl
Task: 5.BrainMRI-AIS
 - Test split: 303 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Save 303 to azure/input/gpt-35-turbo-batch/direct/5.BrainMRI-AIS.batch.jsonl
Task: 6.Brateca.mortality
 - Test split: 3170 sa

In [16]:
model_name = "gpt-4o-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct")
proceed_task(dict_task_path, model_name, prompt_mode="cot")

Task: 1-1.ADE-ADE identification
 - Test split: 2097 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Save 2097 to azure/input/gpt-4o-batch/direct/1-1.ADE-ADE identification.batch.jsonl
Task: 1-2.ADE-ADE relation
 - Test split: 428 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Save 428 to azure/input/gpt-4o-batch/direct/1-2.ADE-ADE relation.batch.jsonl
Task: 1-3.ADE-Drug dosage
 - Test split: 193 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Save 193 to azure/input/gpt-4o-batch/direct/1-3.ADE-Drug dosage.batch.jsonl
Task: 5.BrainMRI-AIS
 - Test split: 303 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Save 303 to azure/input/gpt-4o-batch/direct/5.BrainMRI-AIS.batch.jsonl
Task: 6.Brateca.mortality
 - Test split: 3170 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading 

### Merge multiple task into one batch data jsonl

#### gpt-35-turbo-batch

In [29]:
prompt_mode = "direct"
model = "gpt-35-turbo-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=50000, max_size_mb=180)

Created: azure/input/merged/gpt-35-turbo-batch/direct/gpt-35-turbo-batch.direct.chunk_0.jsonl with 49708 lines and size 137.82 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct/gpt-35-turbo-batch.direct.chunk_1.jsonl with 49675 lines and size 73.35 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct/gpt-35-turbo-batch.direct.chunk_2.jsonl with 30209 lines and size 74.64 MB


In [None]:
prompt_mode = "cot"
model = "gpt-35-turbo-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=50000, max_size_mb=180)

In [31]:
prompt_mode = "direct-5-shot"
model = "gpt-35-turbo-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=50000, max_size_mb=180)

Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_0.jsonl with 6191 lines and size 161.19 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_1.jsonl with 6015 lines and size 176.35 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_2.jsonl with 26240 lines and size 149.22 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_3.jsonl with 33606 lines and size 156.75 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_4.jsonl with 28040 lines and size 149.37 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_5.jsonl with 18283 lines and size 116.24 MB
Created: azure/input/merged/gpt-35-turbo-batch/direct-5-shot/gpt-35-turbo-batch.direct-5-shot.chunk_6.jsonl with 11217 lines and size 75.5

#### gpt-4o-batch

In [32]:
prompt_mode = "direct"
model = "gpt-4o-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=50000, max_size_mb=180)

Created: azure/input/merged/gpt-4o-batch/direct/gpt-4o-batch.direct.chunk_0.jsonl with 49708 lines and size 137.25 MB
Created: azure/input/merged/gpt-4o-batch/direct/gpt-4o-batch.direct.chunk_1.jsonl with 49675 lines and size 72.78 MB
Created: azure/input/merged/gpt-4o-batch/direct/gpt-4o-batch.direct.chunk_2.jsonl with 30209 lines and size 74.29 MB


In [33]:
prompt_mode = "cot"
model = "gpt-4o-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=50000, max_size_mb=180)

Created: azure/input/merged/gpt-4o-batch/cot/gpt-4o-batch.cot.chunk_0.jsonl with 49708 lines and size 140.75 MB
Created: azure/input/merged/gpt-4o-batch/cot/gpt-4o-batch.cot.chunk_1.jsonl with 49675 lines and size 76.34 MB
Created: azure/input/merged/gpt-4o-batch/cot/gpt-4o-batch.cot.chunk_2.jsonl with 30209 lines and size 76.40 MB


In [15]:
prompt_mode = "direct-5-shot"
model = "gpt-4o-batch"
merge_azure_batch_data(model, prompt_mode, max_lines=80000, max_size_mb=190)

Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_0.jsonl with 6191 lines and size 161.12 MB
Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_1.jsonl with 6981 lines and size 185.43 MB
Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_2.jsonl with 25274 lines and size 139.78 MB
Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_3.jsonl with 33606 lines and size 156.37 MB
Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_4.jsonl with 33874 lines and size 184.12 MB
Created: azure/input/merged/gpt-4o-batch/direct-5-shot/gpt-4o-batch.direct-5-shot.chunk_5.jsonl with 23666 lines and size 156.34 MB


## Evaluation preparation

In [7]:
path_dir_azure_result = "azure/output"
path_dir_save = "result_test"

In [8]:
process_azure_result_to_task_result(path_dir_azure_result, path_dir_save)

Task: 104.HealthCareMagic-100k
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: direct-5-shot
 - Split: test
----------------------------------------
 - Num of data: 11217
 - Num of batch: 11217
 - Num of result: 11217
 - Matched: 11217
 - All matched.
 - Saved: result_test/104.HealthCareMagic-100k/gpt-35-turbo/104.HealthCareMagic-100k-direct-5-shot-greedy-42.result.json
Task: 1-1.ADE-ADE identification
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 2097
 - Num of batch: 2097
 - Num of result: 2097
 - Matched: 2097
 - All matched.
 - Saved: result_test/1-1.ADE-ADE identification/gpt-35-turbo/1-1.ADE-ADE identification-cot-greedy-42.result.json
Task: 1-2.ADE-ADE relation
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 428
 - Num of batch: 428
 - Num of result: 428
 - Matched: 428
 - All matched.
 - Saved: result_test/1-2.ADE-ADE re

## Mising result

In [13]:
list_path_file = [
    os.path.join(path_dir_azure_result, path_file)
    for path_file in os.listdir(path_dir_azure_result)
    if path_file.endswith(".jsonl")
]
dict_model_prompt_result = {
    "gpt-35-turbo-batch": {
        "direct": [],
        "cot": [],
        "direct-5-shot": [],
    },
    "gpt-4o-batch": {
        "direct": [],
        "cot": [],
        "direct-5-shot": [],
    }
}
for path_file in tqdm(list_path_file):
    with open(path_file, "r", encoding="utf-8") as f:
        list_dict_result = [json.loads(line) for line in f.readlines()]
    dict_result = list_dict_result[0]
    task_name, model_name, prompt_mode, split, id = dict_result['custom_id'].split("|")
    dict_model_prompt_result[model_name][prompt_mode].append(path_file)

100%|██████████| 25/25 [00:25<00:00,  1.01s/it]


In [15]:
dict_model_prompt_result

{'gpt-35-turbo-batch': {'direct': ['azure/output/file-4f34fb37-fc17-4273-8823-c463092641ad.jsonl',
   'azure/output/file-3072394f-90ab-4762-abce-65636c96f265.jsonl',
   'azure/output/file-be494034-2a2c-4db9-ba9e-16c90f3f2b33.jsonl'],
  'cot': ['azure/output/file-4658a44f-42c9-48ee-abe6-5d8b8b01e014.jsonl',
   'azure/output/file-69ffa97d-1c68-45fa-a678-547d68edc8cb.jsonl',
   'azure/output/file-1ce9d616-383d-45fb-af29-9e26c958d06c.jsonl'],
  'direct-5-shot': ['azure/output/file-ca075f22-6ec6-447a-bfd8-c04e9605f681.jsonl',
   'azure/output/file-b39c2e02-2bd7-4983-8f74-045fc50fcf50.jsonl',
   'azure/output/file-9ed9d259-3067-40dd-96b4-4a92bb31b46a.jsonl',
   'azure/output/file-c562680f-1cde-431a-bd27-991851f8a4ce.jsonl',
   'azure/output/file-8fd5ab91-800e-4f8f-b74a-8c0ab51bbcf3.jsonl',
   'azure/output/file-6eed46d6-bc0d-4700-9a30-b9f236276511.jsonl',
   'azure/output/file-f2370c62-ee40-4851-82fc-808c0d8a10b3.jsonl']},
 'gpt-4o-batch': {'direct': ['azure/output/file-59b186d8-74d8-4c2a-b1

In [16]:
path_dir_save = "result_test"
list_task = os.listdir(path_dir_save)
for task_name in list_task:
    print(f"Task: {task_name}")
    path_dir_task = os.path.join(path_dir_save, task_name)
    list_path_dir_model = os.listdir(path_dir_task)
    # gpt-4o
    model_name = "gpt-4o"
    if model_name not in list_path_dir_model:
        print(f" - Missing model: {model_name}")
    else:
        list_path_file = os.listdir(os.path.join(path_dir_task, model_name))
        list_prompt_mode = [ path_file.split(task_name+'-')[1].split("-greedy")[0] for path_file in list_path_file ]
        string_missing = ""
        if "direct" not in list_prompt_mode:
            string_missing+= "direct, "
        if "cot" not in list_prompt_mode:
            string_missing+= "cot, "
        if "direct-5-shot" not in list_prompt_mode:
            string_missing+= "direct-5-shot"
        if string_missing:
            print(f" - {model_name} miss: {string_missing}")
    # gpt-35-turbo
    model_name = "gpt-35-turbo"
    if model_name not in list_path_dir_model:
        print(f" - Missing model: {model_name}")
    else:
        list_path_file = os.listdir(os.path.join(path_dir_task, model_name))
        list_prompt_mode = [ path_file.split(task_name+'-')[1].split("-greedy")[0] for path_file in list_path_file ]
        string_missing = ""
        if "direct" not in list_prompt_mode:
            string_missing+= "direct, "
        if "cot" not in list_prompt_mode:
            string_missing+= "cot, "
        if "direct-5-shot" not in list_prompt_mode:
            string_missing+= "direct-5-shot"
        if string_missing:
            print(f" - {model_name} miss: {string_missing}")

Task: 104.HealthCareMagic-100k
Task: 1-1.ADE-ADE identification
Task: 1-2.ADE-ADE relation
Task: 1-3.ADE-Drug dosage
Task: 5.BrainMRI-AIS
Task: 6.Brateca.mortality
 - gpt-35-turbo miss: direct-5-shot
Task: 6.Brateca.hospitalization
Task: 7.Cantemist.NER
Task: 7.Cantemist.CODING
Task: 7.Cantemist.Norm
Task: 8.CARES.area
Task: 8.CARES.icd10_block
Task: 8.CARES.icd10_chapter
Task: 8.CARES.icd10_sub_block
Task: 9.CHIP-CDEE
Task: 12.C-EMRS
Task: 19.ClinicalNotes-UPMC
Task: 22.CLIP
Task: 23.cMedQA
Task: 26.DialMed
Task: 28.MIE
Task: 29.EHRQA.primary_department
Task: 29.EHRQA.qa
Task: 29.EHRQA.sub_department
Task: 31.Ex4CDS
Task: 33.GOUT-CC.consensus
Task: 33.GOUT-CC.predict
Task: 43.IMCS-V2-NER
Task: 81.CHIP-CDN
Task: 99.CARDIO:DE
 - gpt-35-turbo miss: direct-5-shot
Task: 101.IFMIR.IncidentType
Task: 101.IFMIR.NER
Task: 100.GraSSCo_PHI
Task: 101.IFMIR.NER_factuality
Task: 102.iCorpus
Task: 103.icliniq-10k
Task: 82.CHIP-CTC
Task: 83.CHIP-MDCFNPC
Task: 84.MedDG
Task: 85.IMCS-V2-SR
Task: 86.IMC

## Cost calculation

In [37]:
list_path_file = [
    os.path.join(path_dir_azure_result, path_file)
    for path_file in os.listdir(path_dir_azure_result)
    if path_file.endswith(".jsonl")
]
list_path_file

['azure/output/file-ca075f22-6ec6-447a-bfd8-c04e9605f681.jsonl',
 'azure/output/file-4658a44f-42c9-48ee-abe6-5d8b8b01e014.jsonl',
 'azure/output/file-4f34fb37-fc17-4273-8823-c463092641ad.jsonl',
 'azure/output/file-80f2b63a-1c83-40b4-99a2-ec556b4bb24a.jsonl',
 'azure/output/file-69ffa97d-1c68-45fa-a678-547d68edc8cb.jsonl',
 'azure/output/file-3072394f-90ab-4762-abce-65636c96f265.jsonl',
 'azure/output/file-be494034-2a2c-4db9-ba9e-16c90f3f2b33.jsonl',
 'azure/output/file-59b186d8-74d8-4c2a-b1df-e88ea8029225.jsonl',
 'azure/output/file-2b078840-bc72-41e9-af6a-6924a52361a0.jsonl',
 'azure/output/file-1ce9d616-383d-45fb-af29-9e26c958d06c.jsonl',
 'azure/output/file-b39c2e02-2bd7-4983-8f74-045fc50fcf50.jsonl',
 'azure/output/file-9ed9d259-3067-40dd-96b4-4a92bb31b46a.jsonl',
 'azure/output/file-c562680f-1cde-431a-bd27-991851f8a4ce.jsonl',
 'azure/output/file-8fd5ab91-800e-4f8f-b74a-8c0ab51bbcf3.jsonl',
 'azure/output/file-6eed46d6-bc0d-4700-9a30-b9f236276511.jsonl',
 'azure/output/file-12880

In [38]:
dict_model_token = {}
for path_file in tqdm(list_path_file):
    with open(path_file, "r") as f:
        list_dict_result = [json.loads(line) for line in f]
    dict_result = list_dict_result[0]
    task, model_name, prompt_mode, split, id = dict_result["custom_id"].split("|")
    model_name = model_name.replace("-batch", "")
    for dict_result in list_dict_result:
        if model_name not in dict_model_token:
            dict_model_token[model_name] = {"token_input": 0, "token_output": 0}
        token_input = dict_result["response"]["body"]["usage"]["prompt_tokens"]
        token_output = dict_result["response"]["body"]["usage"]["completion_tokens"]
        dict_model_token[model_name]["token_input"] += token_input
        dict_model_token[model_name]["token_output"] += token_output

100%|██████████| 20/20 [00:22<00:00,  1.15s/it]


In [39]:
for model, dict_token in dict_model_token.items():
    cost = cost_calculation_token(dict_token["token_input"], dict_token["token_output"], model, flag_batch=True)
    token_input_m, token_output_m = dict_token["token_input"]/1e6, dict_token["token_output"]/1e6
    print(f"Model: {model}, Cost: ${cost:.2f}, with Input token-{token_input_m:.2f} and Output token-{token_output_m:.2f}")

Model: gpt-35-turbo, Cost: $128.19, with Input token-393.04 and Output token-39.91
Model: gpt-4o, Cost: $454.33, with Input token-187.26 and Output token-44.05


## Supp

In [7]:
dict_task_path = {
    "6.Brateca.mortality": "dataset_raw/6.Brateca.mortality.SFT.json",
    "6.Brateca.hospitalization": "dataset_raw/6.Brateca.hospitalization.SFT.json",
    "99.CARDIO:DE": "dataset_raw/99.CARDIO:DE.SFT.json",
}

In [8]:
def proceed_task(dict_task_path, model_name, prompt_mode, split="test"):
    for task_name, path_file_task in dict_task_path.items():
        list_dict_data_batch = create_azure_batch_data(
            task_name=task_name,
            model_name=model_name,
            prompt_mode=prompt_mode,
            split=split,
            temperature=0,
            top_p=0,
            frequency_penalty=0,
            presence_penalty=0,
            max_token_input=100 * 1024,
            max_token_output=2 * 1024,
        )
        print("========================================")

In [9]:
model_name = "gpt-35-turbo-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct")
proceed_task(dict_task_path, model_name, prompt_mode="cot")
proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 6.Brateca.mortality
 - Test split: 3170 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14336
 - Max token output: 2048
 - Save 3170 to azure/input/gpt-35-turbo-batch/direct/6.Brateca.mortality.batch.jsonl
Task: 6.Brateca.hospitalization
 - Test split: 3183 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14336
 - Max token output: 2048
 - Save 3183 to azure/input/gpt-35-turbo-batch/direct/6.Brateca.hospitalization.batch.jsonl
Task: 99.CARDIO:DE
 - Test split: 380 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14336
 - Max token output: 2048
 - Save 380 to azure/input/gpt-35-turbo-batch/direct/99.CARDIO:DE.batch.jsonl
Task: 6.Brateca.mortality
 - Test split: 3170 samples
 - Model: gpt-35-turbo-batch
 - Prompt: cot
 - Transform the instruction to the 

In [10]:
model_name = "gpt-35-turbo-batch"
dict_list_file = {
    # direct
    "direct": [f"azure/input/{model_name}/direct/6.Brateca.mortality.batch.jsonl",
    f"azure/input/{model_name}/direct/6.Brateca.hospitalization.batch.jsonl",
    f"azure/input/{model_name}/direct/99.CARDIO:DE.batch.jsonl"],
    # cot
    "cot": [f"azure/input/{model_name}/cot/6.Brateca.mortality.batch.jsonl",
    f"azure/input/{model_name}/cot/6.Brateca.hospitalization.batch.jsonl",
    f"azure/input/{model_name}/cot/99.CARDIO:DE.batch.jsonl"],
    # few-shot
    "direct-5-shot.0": [f"azure/input/{model_name}/direct-5-shot/6.Brateca.mortality.batch.jsonl",
    f"azure/input/{model_name}/direct-5-shot/99.CARDIO:DE.batch.jsonl"],
    "direct-5-shot.1":[f"azure/input/{model_name}/direct-5-shot/6.Brateca.hospitalization.batch.jsonl",]
}
# merge the files
for prompt_mode, list_file in dict_list_file.items():
    list_dict_data = []
    for file in list_file:
        with open(file, "r", encoding="utf-8") as f:
            list_dict_data += [json.loads(line) for line in f.readlines()]
    print(f"Total {len(list_dict_data)} samples")
    # save the merged file
    path_file_save = f"azure/input_supp/{model_name}/{model_name}.{prompt_mode}.supp.jsonl"
    os.makedirs(os.path.dirname(path_file_save), exist_ok=True)
    with open(path_file_save, "w", encoding="utf-8") as f:
        for dict_data in list_dict_data:
            f.write(json.dumps(dict_data, ensure_ascii=False) + "\n")

Total 6733 samples
Total 6733 samples
Total 3550 samples
Total 3183 samples


In [9]:
path_dir_azure_result = "azure/output_supp"
path_dir_save = "result_supp"

In [10]:
process_azure_result_to_task_result(path_dir_azure_result, path_dir_save)

Task: 6.Brateca.mortality
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 3170
 - Num of batch: 3170
 - Num of result: 3169
 - Missing: 29355
 - Matched: 3169
 - Lost 1 samples.
 - Saved: result_supp/6.Brateca.mortality/gpt-35-turbo/6.Brateca.mortality-cot-greedy-42.result.json
Task: 6.Brateca.hospitalization
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 3183
 - Num of batch: 3183
 - Num of result: 3183
 - Matched: 3183
 - All matched.
 - Saved: result_supp/6.Brateca.hospitalization/gpt-35-turbo/6.Brateca.hospitalization-cot-greedy-42.result.json
Task: 99.CARDIO:DE
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 380
 - Num of batch: 380
 - Num of result: 380
 - Matched: 380
 - All matched.
 - Saved: result_supp/99.CARDIO:DE/gpt-35-turbo/99.CARDIO:DE-cot-

## Supp-again

In [13]:
dict_task_path = {
    "6.Brateca.hospitalization": "dataset_raw/6.Brateca.hospitalization.SFT.json",
    "17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM": "dataset_raw/17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM.SFT.json",
    "17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS": "dataset_raw/17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS.SFT.json",
    "31.Ex4CDS": "dataset_raw/31.Ex4CDS.SFT.json",
    "91-2.CAS.evidence": "dataset_raw/91-2.CAS.evidence.SFT.json",
    "90-8.n2c2 2014 - Heart Disease Challenge - Medication": "dataset_raw/90-8.n2c2 2014 - Heart Disease Challenge - Medication.SFT.json",
    # only few-shot
    # "22.CLIP": "dataset_raw/22.CLIP.SFT.json",
    # "28.MIE": "dataset_raw/28.MIE.SFT.json",
    # "91-1.CAS.label": "dataset_raw/91-1.CAS.label.SFT.json",
}

### process for each task

In [14]:
def proceed_task(dict_task_path, model_name, prompt_mode, split="test"):
    for task_name, path_file_task in dict_task_path.items():
        list_dict_data_batch = create_azure_batch_data(
            task_name=task_name,
            model_name=model_name,
            prompt_mode=prompt_mode,
            split=split,
            path_dir_raw="dataset_raw",
            path_dir_batch="azure/input_supp_again",
            temperature=0,
            top_p=0,
            frequency_penalty=0,
            presence_penalty=0,
            max_token_input=100 * 1024,
            max_token_output=2 * 1024,
        )
        print("========================================")

In [15]:
model_name = "gpt-35-turbo-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct")
proceed_task(dict_task_path, model_name, prompt_mode="cot")
# proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 6.Brateca.hospitalization
 - Test split: 3183 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14331
 - Max token output: 2048
 - Save 3183 to azure/input_supp_again/gpt-35-turbo-batch/direct/6.Brateca.hospitalization.batch.jsonl
Task: 17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM
 - Test split: 250 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14331
 - Max token output: 2048
 - Save 250 to azure/input_supp_again/gpt-35-turbo-batch/direct/17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM.batch.jsonl
Task: 17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS
 - Test split: 224 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14331
 - Max token output: 2048
 - Save 224 to azure/input_supp_again/gpt-35-turbo-batch/direct/17-2.CLEF_eHealth_2020_CodiEsp_corpu

In [None]:
model_name = "gpt-4o-batch"
proceed_task(dict_task_path, model_name, prompt_mode="direct")
proceed_task(dict_task_path, model_name, prompt_mode="cot")
# proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 6.Brateca.hospitalization
 - Test split: 3183 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 2048
 - Save 3183 to azure/input_supp_again/gpt-4o-batch/direct/6.Brateca.hospitalization.batch.jsonl
Task: 17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM
 - Test split: 250 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 2048
 - Save 250 to azure/input_supp_again/gpt-4o-batch/direct/17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM.batch.jsonl
Task: 17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS
 - Test split: 224 samples
 - Model: gpt-4o-batch
 - Prompt: direct
 - No example
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 2048
 - Save 224 to azure/input_supp_again/gpt-4o-batch/direct/17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS.batch.jsonl
Task: 31.Ex4CDS
 - Test sp

### merge

In [16]:
model_name = "gpt-35-turbo-batch"
prompt_mode = "direct"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)
prompt_mode = "cot"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)
# prompt_mode = "direct-5-shot"
# merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)

Created: azure/input_supp_again/merged/gpt-35-turbo-batch/direct/gpt-35-turbo-batch.direct.chunk_0.jsonl with 5215 lines and size 39.63 MB
Created: azure/input_supp_again/merged/gpt-35-turbo-batch/cot/gpt-35-turbo-batch.cot.chunk_0.jsonl with 5215 lines and size 40.00 MB


In [19]:
model_name = "gpt-4o-batch"
prompt_mode = "direct"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)
prompt_mode = "cot"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)
# prompt_mode = "direct-5-shot"
# merge_azure_batch_data(model_name, prompt_mode, path_dir_raw="azure/input_supp_again", path_dir_merged="azure/input_supp_again/merged", max_lines=50000, max_size_mb=190)

Created: azure/input_supp_again/merged/gpt-4o-batch/direct/gpt-4o-batch.direct.chunk_0.jsonl with 5215 lines and size 39.71 MB
Created: azure/input_supp_again/merged/gpt-4o-batch/cot/gpt-4o-batch.cot.chunk_0.jsonl with 5215 lines and size 40.07 MB


### parse result

In [None]:
path_dir_azure_result = "azure/output"
path_dir_task_save = "result"
path_dir_raw = "dataset_raw"
path_dir_batch = "azure/input_supp_again"

In [17]:
process_azure_result_to_task_result(
    path_dir_azure_result=path_dir_azure_result,
    path_dir_task_save=path_dir_task_save,
    path_dir_raw=path_dir_raw,
    path_dir_batch=path_dir_batch,
)

Task: 6.Brateca.hospitalization
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 3183
 - Num of batch: 3183
 - Num of result: 3183
 - Matched: 3183
 - All matched.
 - Saved: result/6.Brateca.hospitalization/gpt-35-turbo/6.Brateca.hospitalization-cot-greedy-42.result.json
Task: 17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 250
 - Num of batch: 250
 - Num of result: 250
 - Matched: 250
 - All matched.
 - Saved: result/17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM/gpt-35-turbo/17-1.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-CM-cot-greedy-42.result.json
Task: 17-2.CLEF_eHealth_2020_CodiEsp_corpus-ICD-10-PCS
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: cot
 - Split: test
----------------------------------------
 - Num of data: 224
 - Num of batch: 224
 - Num of result: 224
 - Matched: 

## Supp-again-again

In [None]:
path_dir_batch="azure/input_supp_again_again"

In [21]:
dict_task_path = {
    # "8.CARES.icd10_block": "dataset_raw/8.CARES.icd10_block.SFT.json",
    # "8.CARES.icd10_sub_block": "dataset_raw/8.CARES.icd10_sub_block.SFT.json",
    # "29.EHRQA.primary_department": "dataset_raw/29.EHRQA.primary_department.SFT.json",
    # "29.EHRQA.sub_department": "dataset_raw/29.EHRQA.sub_department.SFT.json",
    # "29.EHRQA.qa": "dataset_raw/29.EHRQA.qa.SFT.json",
    # "33.GOUT-CC.consensus": "dataset_raw/33.GOUT-CC.consensus.SFT.json",
    # "105.MIMIC-IV CDM": "dataset_raw/105.MIMIC-IV CDM.SFT.json",
    # "106.MIMIC-III Outcome.LoS": "dataset_raw/106.MIMIC-III Outcome.LoS.SFT.json",
    # "106.MIMIC-III Outcome.Mortality": "dataset_raw/106.MIMIC-III Outcome.Mortality.SFT.json",
    # "108.MIMIC-IV DiReCT.PDD": "dataset_raw/108.MIMIC-IV DiReCT.PDD.SFT.json",
    # "108.MIMIC-IV DiReCT.Dis": "dataset_raw/108.MIMIC-IV DiReCT.Dis.SFT.json",
    # "107.MIMIC-IV BHC": "dataset_raw/107.MIMIC-IV BHC.SFT.json",
    # "100.GraSSCo_PHI": "dataset_raw/100.GraSSCo_PHI.SFT.json",
    # only few-shot
    "99.CARDIO:DE": "dataset_raw/99.CARDIO:DE.SFT.json",
}

### process for each task

In [17]:
def proceed_task(dict_task_path, model_name, prompt_mode, split="test"):
    for task_name, path_file_task in dict_task_path.items():
        list_dict_data_batch = create_azure_batch_data(
            task_name=task_name,
            model_name=model_name,
            prompt_mode=prompt_mode,
            split=split,
            path_dir_raw="dataset_raw",
            path_dir_batch=path_dir_batch,
            temperature=0,
            top_p=0,
            frequency_penalty=0,
            presence_penalty=0,
            max_token_input=100 * 1024,
            max_token_output=2 * 1024,
        )
        print("========================================")

In [18]:
model_name = "gpt-35-turbo-batch"
# proceed_task(dict_task_path, model_name, prompt_mode="direct")
# proceed_task(dict_task_path, model_name, prompt_mode="cot")
proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 99.CARDIO:DE
 - Test split: 369 samples
 - Model: gpt-35-turbo-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-35-turbo
 - Max token input: 14331
 - Max token output: 2048
 - Save 369 to azure/input_supp_again_again/gpt-35-turbo-batch/direct-5-shot/99.CARDIO:DE.batch.jsonl


In [19]:
model_name = "gpt-4o-batch"
# proceed_task(dict_task_path, model_name, prompt_mode="direct")
# proceed_task(dict_task_path, model_name, prompt_mode="cot")
proceed_task(dict_task_path, model_name, prompt_mode="direct-5-shot")

Task: 99.CARDIO:DE
 - Test split: 369 samples
 - Model: gpt-4o-batch
 - Prompt: direct-5-shot
 - Prepare 5 examples
 - Loading tokenizer of gpt-4o
 - Max token input: 102400
 - Max token output: 2048
 - Save 369 to azure/input_supp_again_again/gpt-4o-batch/direct-5-shot/99.CARDIO:DE.batch.jsonl


### merge

In [None]:
model_name = "gpt-35-turbo-batch"
# prompt_mode = "direct"
# merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)
# prompt_mode = "cot"
# merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)
prompt_mode = "direct-5-shot"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)

Created: azure/input_supp_again_again/merged/gpt-35-turbo-batch.direct.chunk_0.jsonl with 22032 lines and size 41.85 MB
Created: azure/input_supp_again_again/merged/gpt-35-turbo-batch.cot.chunk_0.jsonl with 22032 lines and size 43.41 MB
Created: azure/input_supp_again_again/merged/gpt-35-turbo-batch.direct-5-shot.chunk_0.jsonl with 22032 lines and size 178.06 MB
Created: azure/input_supp_again_again/merged/gpt-35-turbo-batch.direct-5-shot.chunk_1.jsonl with 1819 lines and size 25.76 MB


In [31]:
model_name = "gpt-4o-batch"
prompt_mode = "direct"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)
prompt_mode = "cot"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)
prompt_mode = "direct-5-shot"
merge_azure_batch_data(model_name, prompt_mode, path_dir_raw=path_dir_batch, path_dir_merged=f"{path_dir_batch}/merged", max_lines=50000, max_size_mb=190)

Created: azure/input_supp_again_again/merged/gpt-4o-batch.direct.chunk_0.jsonl with 22032 lines and size 41.62 MB
Created: azure/input_supp_again_again/merged/gpt-4o-batch.cot.chunk_0.jsonl with 22032 lines and size 43.17 MB
Created: azure/input_supp_again_again/merged/gpt-4o-batch.direct-5-shot.chunk_0.jsonl with 22032 lines and size 182.01 MB
Created: azure/input_supp_again_again/merged/gpt-4o-batch.direct-5-shot.chunk_1.jsonl with 1819 lines and size 25.74 MB


### parse result

In [12]:
path_dir_azure_result = "azure/output_99"
path_dir_task_save = "result"
# "result_supp_again_again"
path_dir_raw="dataset_raw"
path_dir_batch="azure/input_supp_again_again/"

In [13]:
process_azure_result_to_task_result(
    path_dir_azure_result=path_dir_azure_result,
    path_dir_task_save=path_dir_task_save,
    path_dir_raw=path_dir_raw,
    path_dir_batch=path_dir_batch,
)

Task: 99.CARDIO:DE
 - Model Name: gpt-4o-batch
 - Prompt Mode: direct-5-shot
 - Split: test
----------------------------------------
 - Num of data: 369
 - Num of batch: 369
 - Num of result: 369
 - Matched: 369
 - All matched.
 - Saved: result/99.CARDIO:DE/gpt-4o/99.CARDIO:DE-direct-5-shot-greedy-42.result.json
Task: 99.CARDIO:DE
 - Model Name: gpt-35-turbo-batch
 - Prompt Mode: direct-5-shot
 - Split: test
----------------------------------------
 - Num of data: 369
 - Num of batch: 369
 - Num of result: 369
 - Matched: 369
 - All matched.
 - Saved: result/99.CARDIO:DE/gpt-35-turbo/99.CARDIO:DE-direct-5-shot-greedy-42.result.json


## End

In [1]:
print('Done.')

Done.
