In [1]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)

In [2]:
import json
# Path to your updated JSON file
updated_json_file_path = '/root/llama/datasets/classification_task_new_labels.json'  

# Read the updated JSON file
with open(updated_json_file_path, 'r', encoding='utf-8') as file:
    updated_data = json.load(file)

# Number of entries to display
num_entries_to_display = 5

# Print the first few entries to check the labels
for entry in updated_data[:num_entries_to_display]:
    print(f"Label: {entry['label']}")
    print(f"Text: {entry['clinical_conditions']}\n")

Label: cardiovascular diseases
Text: Catheterization laboratory events and hospital outcome with direct angioplasty for acute myocardial infarction To assess the safety of direct infarct angioplasty without antecedent thrombolytic therapy, catheterization laboratory and hospital events were assessed in consecutively treated patients with infarctions involving the left anterior descending (n = 100 patients), right (n = 100), and circumflex (n = 50) coronary arteries. The groups of patients were similar for age (left anterior descending coronary artery, 59 years; right coronary artery, 58 years; circumflex coronary artery, 62 years), patients with multivessel disease (left anterior descending coronary artery, 55%; right coronary artery, 55%; circumflex coronary artery, 64%), and patients with initial grade 0/1 antegrade flow (left anterior descending coronary artery, 79%; right coronary artery, 84%; circumflex coronary artery, 90%). Cardiogenic shock was present in eight patients with in

In [3]:
from datasets import load_dataset
updated_json_file_path = '/root/llama/datasets/classification_task_new_labels.json'  

# Loading the dataset
dataset = load_dataset('json', data_files=updated_json_file_path)
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['clinical_conditions', 'label'],
        num_rows: 14438
    })
})


In [4]:
# Model loading
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Convert model to use 16-bit floating point precision (half precision)
model = model.half()  # This converts all the model weights to float16

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right" 

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
#this cell has examples that are not used

#prompts = [
#    "Medical Diagnosis Task: You are presented with a clinical description. Your task is to identify the most appropriate medical category for diagnosis from the following options:",
#    "1. Neoplasms",
#    "2. Digestive System Diseases",
#    "3. Nervous System Diseases",
#    "4. Cardiovascular Diseases",
#    "5. General Pathological Conditions",
#    "First, carefully analyze the clinical description to identify main keywords related to medical conditions you can recognize.",
#    "Next, consider the provided categories and map the correct one based on your knowledge.",
#    "Your response should only include the diagnosis by clearly stating the category and nothing else.",
#    "No explanation is needed.",
#    "Ensure your answer matches one of the provided diagnosis/categories.",
#    "Clinical Description: '{dataset['train'][i]['clinical_conditions']}'\nDiagnosis: "
#    for i in range(20)
#]

#prompts = [
#    f"Medical Diagnosis Task: You are presented with a clinical description. Your task is to identify the most appropriate medical category for diagnosis from the following options:\n1. Neoplasms\n2. Digestive System Diseases\n3. Nervous System Diseases\n4. Cardiovascular Diseases\n5. General Pathological Conditions\n\nFirst, carefully analyze the clinical description to identify main keywords related to medical conditions you can recognize.\nNext, consider the provided categories and map the correct one based on your knowledge.\nYour response should only include the diagnosis by clearly stating the category and nothing else.\nNo explanation is needed.\nEnsure your answer matches one of the provided diagnosis/categories.\nClinical Description: '{dataset['train'][i]['clinical_conditions']}'\nDiagnosis: "
#    for i in range(20)
#]


In [35]:
#this prompt is used in chain of thoughts
prompts = [
    f"Medical Diagnosis Task: You are presented with a clinical description. Your goal is to assign one of the following labels to the description:\n1. Neoplasms\n2. Digestive System Diseases\n3. Nervous System Diseases\n4. Cardiovascular Diseases\n5. General Pathological Conditions\n\nAnalyze the clinical description and identify keywords or phrases that match one of the labels.\nMap the keywords to the most appropriate label.\nYour response should only include the selected label.\nClinical Description: '{clinical_description}'\nDiagnosis: "
    for clinical_description in [dataset['train'][i]['clinical_conditions'] for i in range(20)]
]


In [45]:
#this prompt is used in few shot learning
#prompts = [
#    f"Medical Diagnosis Task: You are presented with a clinical description. Your goal is to assign one of the following labels to the description:\n1. Neoplasms\n2. Digestive System Diseases\n3. Nervous System Diseases\n4. Cardiovascular Diseases\n5. General Pathological Conditions\n\nAnalyze the clinical description and identify keywords or phrases that match one of the labels.\nMap the keywords to the most appropriate label.\nYour response should only include the selected label.\nClinical Description: '{clinical_description}'\nDiagnosis: "
#    for clinical_description in [dataset['train'][i]['clinical_conditions'] for i in range(20)]
#]

prompts = [
    f"Medical Diagnosis Task: You are presented with a clinical description. Your goal is to assign one of the following labels to the description:\n"
    "1. Neoplasms\n"
    "2. Digestive System Diseases\n"
    "3. Nervous System Diseases\n"
    "4. Cardiovascular Diseases\n"
    "5. General Pathological Conditions\n\n"
    "Your response should only include the selected label. \n"
    "Here are a couple of examples:\n"
    "- Example 1: A study investigates catheterization laboratory events and hospital outcomes in patients with acute myocardial infarction. It focuses on direct infarct angioplasty without thrombolytic therapy, highlighting major events and their association with different coronary arteries. The results emphasize the impact of cardiogenic shock on these events. (Correct Label: Cardiovascular Diseases)\n"
    "- Example 2: Clinical conditions discuss renal abscesses in children, presenting cases and reviewing additional pediatric cases over the past ten years. It describes common presenting features and challenges in diagnosis. The text also mentions the identification of causative organisms and treatment options. (Correct Label: General Pathological Conditions)\n\n"
    f"Clinical Description: '{clinical_description}'\nDiagnosis: "
    for clinical_description in [dataset['train'][i]['clinical_conditions'] for i in range(50)]
]

In [8]:
import torch

# Checking for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Print the device being used
print(f"Using device: {device}")

# Check if the model is wrapped in DataParallel and unwrap it if necessary
if isinstance(model, torch.nn.DataParallel):
    # Extract the original model from DataParallel wrapper
    unwrapped_model = model.module
else:
    unwrapped_model = model

# Move the unwrapped model to the defined device (GPU or CPU)
unwrapped_model.to(device)

Using device: cuda


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNo

In [46]:
model_outputs = []
for prompt in prompts:
    input_ids = llama_tokenizer.encode(prompt, return_tensors='pt').to(device)
    output_ids = unwrapped_model.generate(input_ids, max_length=512, temperature=0.4, top_p=0.6)
    generated_text = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Post-processing to remove the echoed prompt
    if generated_text.startswith(prompt):
        generated_text = generated_text[len(prompt):].strip()

    # Append the generated text directly
    model_outputs.append(generated_text)

Input length of input_ids is 741, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 533, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 583, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 848, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 574, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 683, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 540, but `max_length` is set to 512. This can lead to

In [47]:
#extract generated plus actual summaries
generated_summaries = model_outputs
actual_labels = [dataset['train'][i]['label'] for i in range(50)]

In [52]:
index = 2  

print(f"Prompt:\n{prompts[index]}\n")
print(f"Generated Label:\n{generated_summaries[index]}\n")
print(f"Actual Label:\n{actual_labels[index]}\n")
print("---------------------------------------------------\n")


Prompt:
Medical Diagnosis Task: You are presented with a clinical description. Your goal is to assign one of the following labels to the description:
1. Neoplasms
2. Digestive System Diseases
3. Nervous System Diseases
4. Cardiovascular Diseases
5. General Pathological Conditions

Your response should only include the selected label. 
Here are a couple of examples:
- Example 1: A study investigates catheterization laboratory events and hospital outcomes in patients with acute myocardial infarction. It focuses on direct infarct angioplasty without thrombolytic therapy, highlighting major events and their association with different coronary arteries. The results emphasize the impact of cardiogenic shock on these events. (Correct Label: Cardiovascular Diseases)
- Example 2: Clinical conditions discuss renal abscesses in children, presenting cases and reviewing additional pediatric cases over the past ten years. It describes common presenting features and challenges in diagnosis. The text 

In [50]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define the label mapping
label_mapping = {
    '1': 'neoplasms',
    '2': 'digestive system diseases',
    '3': 'nervous system diseases',
    '4': 'cardiovascular diseases',
    '5': 'general pathological conditions',
    'neoplasms': 'neoplasms',
    'digestive system diseases': 'digestive system diseases',
    'nervous system diseases': 'nervous system diseases',
    'cardiovascular diseases': 'cardiovascular diseases',
    'general pathological conditions': 'general pathological conditions',
    '1. neoplasms': 'neoplasms',
    '2. digestive system diseases': 'digestive system diseases',
    '3. nervous system diseases': 'nervous system diseases',
    '4. cardiovascular diseases': 'cardiovascular diseases',
    '5. general pathological conditions': 'general pathological conditions',
    '1 - neoplasms': 'neoplasms',
    '2 - digestive system diseases': 'digestive system diseases',
    '3 - nervous system diseases': 'nervous system diseases',
    '4 - cardiovascular diseases': 'cardiovascular diseases',
    '5 - general pathological conditions': 'general pathological conditions',
    '1: neoplasms': 'neoplasms',
    '2: digestive system diseases': 'digestive system diseases',
    '3: nervous system diseases': 'nervous system diseases',
    '4: cardiovascular diseases': 'cardiovascular diseases',
    '5: general pathological conditions': 'general pathological conditions',
}

# The relevant parts from each generated output are extracted and and mapped to actual labels
mapped_labels = []

for output in generated_summaries:
    matches = re.findall(r'(1|2|3|4|5|[A-Za-z\s]+|[1-5][.: -]\s*[A-Za-z\s]+)', output)
    mapped_label = None

    for match in matches:
        match = match.strip()
        if match in label_mapping:
            mapped_label = label_mapping[match]
            break

    if mapped_label:
        mapped_labels.append(mapped_label)
    else:
        mapped_labels.append("Unknown")

# Filter out "Unknown" responses from the actual_labels
filtered_actual_labels = [label for label in actual_labels if label != "Unknown"]

# Calculate accuracy
accuracy = accuracy_score(filtered_actual_labels, mapped_labels)

# Calculate precision, recall, and F1-score
precision = precision_score(filtered_actual_labels, mapped_labels, average='weighted')
recall = recall_score(filtered_actual_labels, mapped_labels, average='weighted')
f1 = f1_score(filtered_actual_labels, mapped_labels, average='weighted')

# Print or use the calculated metrics as needed
print("Accuracy:", accuracy)
print("Weighted Precision:", precision)
print("Weighted Recall:", recall)
print("Weighted F1:", f1)


Accuracy: 0.44
Weighted Precision: 0.292972972972973
Weighted Recall: 0.44
Weighted F1: 0.34620689655172415


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
