In [1]:
from datetime import datetime
import time 
import gc

from llm_utils import LLMModel 

  from .autonotebook import tqdm as notebook_tqdm


## n2c2_2008 Intuitive Model Comparison Tasks

#### STARTING SETUP

In [None]:
date_str = datetime.today().strftime('%Y-%m-%d')
log_file_name = f'server2-{date_str}-n2c2-tasks-2008-intuitive-llm-model-runs.txt'
example_names = ['example_A']

#### MISTRAL7B-INSTRUCT

In [3]:
model_start = time.time()

with open(log_file_name, 'a') as f:
    f.write(f'Starting to run server2 models on n2c2_2008 data... \n')
    f.write(f'Running mistral7B instruct model on n2c2_2018 data... \n')

In [4]:
mistral7b_instruct_model = LLMModel(
    config_file='./model-configs/mistral7b_instruct_n2c2_2008.yml',
    title_prefix='3-n2c2_2008-intuitive-few-shot-mistral-7b-instruct', 
    task_names = ['intuitive_Asthma', 'intuitive_CAD', 'intuitive_CHF', 'intuitive_Depression', 'intuitive_Diabetes', 
              'intuitive_Gallstones', 'intuitive_GERD', 'intuitive_Gout', 'intuitive_Hypertension', 'intuitive_Hypercholesterolemia',
              'intuitive_Hypertriglyceridemia', 'intuitive_OA', 'intuitive_Obesity', 'intuitive_OSA', 'intuitive_PVD',
              'intuitive_Venous Insufficiency'],
    is_multiclass=True)

mistral7b_instruct_model.setupModelForPredictions()

2024-03-02 06:12:54,136 - n2c2 - INFO - Parameters: 
{device: {device: cuda, load_8bit: true, num_gpus: 1}, general: {resources: {section_titles: resources/n2c2_sections_subpart.txt},
    test: {dir: data/n2c2_2008_data, files: [n2c2_2008_test.csv]}, train: {dir: data/n2c2_2008_data,
      files: [n2c2_2008_train.csv]}, val: {dir: data/n2c2_2008_data, files: [n2c2_2008_val.csv]}},
  generation: {echo: false, judge_sent_end: false, max_new_tokens: 256, model_path: mistralai/Mistral-7B-Instruct-v0.1,
    repetition_penalty: 1.0, temperature: 0}, intuitive_Asthma: {few_shot: true},
  intuitive_CAD: {few_shot: true}, intuitive_CHF: {few_shot: true}, intuitive_Depression: {
    few_shot: true}, intuitive_Diabetes: {few_shot: true}, intuitive_GERD: {few_shot: true},
  intuitive_Gallstones: {few_shot: true}, intuitive_Gout: {few_shot: true}, intuitive_Hypercholesterolemia: {
    few_shot: true}, intuitive_Hypertension: {few_shot: true}, intuitive_Hypertriglyceridemia: {
    few_shot: true}, i

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): CLinear()
          (k_proj): CLinear()
          (v_proj): CLinear()
          (o_proj): CLinear()
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): CLinear()
          (up_proj): CLinear()
          (down_proj): CLinear()
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): CLinear()
)


2024-03-02 06:14:26,504 - n2c2 - INFO - Load retriever


In [None]:
temp_start = time.time()
elapsed_time = temp_start - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished setting up mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')

for example_name in example_names:
    temp_end = time.time()
    elapsed_time = temp_end - temp_start

    mistral7b_instruct_model.load2006and2008ModelPredictions(example_name)

    with open(log_file_name, 'a') as f:
        f.write(f'Finished running {example_name} for mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')
    
    temp_start = time.time()
        

model_end = time.time()
elapsed_time = model_end - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished running mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')

del mistral7b_instruct_model
gc.collect()

2024-03-02 06:14:38,808 - n2c2 - INFO - Load data
2024-03-02 06:14:38,865 - n2c2 - INFO - Load data
2024-03-02 06:14:38,885 - n2c2 - INFO - Load data
  prediction_df.loc[prediction_df['id'] == text_id, task_name] = pred_answer
 24%|██▎       | 103/438 [07:33<24:46,  4.44s/it]

#### ENDING CLEANUP

In [None]:
end = time.time()
elapsed_time = end - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished running models on server 2 on n2c2_2008 data for textual tasks: {elapsed_time}')


## n2c2_2008 Textual Model Comparison Tasks

#### STARTING SETUP

In [None]:
date_str = datetime.today().strftime('%Y-%m-%d')
log_file_name = f'server1-{date_str}-n2c2-tasks-2008-textual-llm-model-runs.txt'
example_names = ['example_A']

#### MISTRAL7B-INSTRUCT

In [None]:
model_start = time.time()

with open(log_file_name, 'a') as f:
    f.write(f'Starting to run server1 models on n2c2_2008 data... \n')
    f.write(f'Running mistral7B instruct model on n2c2_2018 data... \n')

In [None]:
mistral7b_instruct_model = LLMModel(
    config_file='./model-configs/mistral7b_instruct_n2c2_2008.yml',
    title_prefix='3-n2c2_2008-textual-few-shot-mistral-7b-instruct', 
    task_names = ['textual_Asthma', 'textual_CAD', 'textual_CHF', 'textual_Depression', 'textual_Diabetes',
              'textual_Gallstones', 'textual_GERD', 'textual_Gout', 'textual_Hypertension', 'textual_Hypercholesterolemia', 
              'textual_Hypertriglyceridemia', 'textual_OA', 'textual_Obesity', 'textual_OSA', 'textual_PVD',   
              'textual_Venous Insufficiency'],
    is_multiclass=True)

mistral7b_instruct_model.setupModelForPredictions()

2024-03-02 06:02:27,679 - n2c2 - INFO - Parameters: 
{device: {device: cuda, load_8bit: true, num_gpus: 1}, general: {resources: {section_titles: resources/n2c2_sections_subpart.txt},
    test: {dir: data/n2c2_2008_data, files: [n2c2_2008_test.csv]}, train: {dir: data/n2c2_2008_data,
      files: [n2c2_2008_train.csv]}, val: {dir: data/n2c2_2008_data, files: [n2c2_2008_val.csv]}},
  generation: {echo: false, judge_sent_end: false, max_new_tokens: 256, model_path: mistralai/Mistral-7B-Instruct-v0.1,
    repetition_penalty: 1.0, temperature: 0}, intuitive_Asthma: {few_shot: true},
  intuitive_CAD: {few_shot: true}, intuitive_CHF: {few_shot: true}, intuitive_Depression: {
    few_shot: true}, intuitive_Diabetes: {few_shot: true}, intuitive_GERD: {few_shot: true},
  intuitive_Gallstones: {few_shot: true}, intuitive_Gout: {few_shot: true}, intuitive_Hypercholesterolemia: {
    few_shot: true}, intuitive_Hypertension: {few_shot: true}, intuitive_Hypertriglyceridemia: {
    few_shot: true}, i

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): CLinear()
          (k_proj): CLinear()
          (v_proj): CLinear()
          (o_proj): CLinear()
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): CLinear()
          (up_proj): CLinear()
          (down_proj): CLinear()
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): CLinear()
)


2024-03-02 06:06:59,183 - n2c2 - INFO - Load retriever


In [None]:
temp_start = time.time()
elapsed_time = temp_start - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished setting up mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')

for example_name in example_names:
    temp_end = time.time()
    elapsed_time = temp_end - temp_start

    mistral7b_instruct_model.load2006and2008ModelPredictions(example_name)

    with open(log_file_name, 'a') as f:
        f.write(f'Finished running {example_name} for mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')
    
    temp_start = time.time()
        

model_end = time.time()
elapsed_time = model_end - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished running mistral7b_instruct_model model on n2c2_2008 data: {elapsed_time}\n')

del mistral7b_instruct_model
gc.collect()

2024-03-02 06:08:37,799 - n2c2 - INFO - Load data
2024-03-02 06:08:37,976 - n2c2 - INFO - Load data
2024-03-02 06:08:38,048 - n2c2 - INFO - Load data
  prediction_df.loc[prediction_df['id'] == text_id, task_name] = pred_answer
 42%|████▏     | 185/438 [13:34<18:56,  4.49s/it]

#### ENDING CLEANUP

In [None]:
end = time.time()
elapsed_time = end - model_start

with open(log_file_name, 'a') as f:
    f.write(f'Finished running models on server 1 on n2c2_2008 data for textual tasks: {elapsed_time}')
